# FT LINEAR REGRESSION
The aim of this project is to introduce you to the basic concept behind machine learning.
For this project, you will have to create a program that predicts the price of a car by
using a linear function train with a gradient descent algorithm.
We will work on a precise example for the project, but once you’re done you will be
able to use the algorithm with any other dataset.

![image.png](attachment:image.png)

<a href='#IMPORTS'>IMPORTS</a> <br />
<a href='#IMPORTS'>DATA</a> <br />
<a href='#IMPORTS'>GRADIENT DESCENT</a> <br />
<a href='#IMPORTS'>THETAS</a> <br />
<a href='#IMPORTS'>COST FUNCTION</a> <br />
<a href='#IMPORTS'>ERROR STATS</a> <br />
<a href='#IMPORTS'>REGRESSION ANIMATION</a> <br />
<a href='#IMPORTS'>PREDICTION</a> <br />

## IMPORTS
<a id='IMPORTS'></a>

In [None]:
import numpy as np 
import pandas as pd
from IPython.display import Image
import io
import base64
from IPython.display import HTML

from subprocess import check_output
print(check_output(["ls", "."]).decode("utf8"))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation

data = pd.read_csv('./data.csv')

## DATA
<a id='DATA'></a>

In [None]:
x = data['km']
y = data['price']
tempmean=x.mean()
tempstd=x.std()
tempvar=x.var()
x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x]
print("\033[1m" + "Our data sample looks like this" + "\033[0m")
print(data)

## GRADIENT DESCENT
<a id='GRADIENT DESCENT'></a>

In [None]:
alpha = 0.01
iterations = 1000
m = y.size
np.random.seed(123)
theta = [0,0]

#GRADIENT DESCENT
def gradient_descent(x, y, theta, iterations, alpha):
    past_costs = []
    past_thetas = [theta]
    for i in range(iterations):
        prediction = np.dot(x, theta)
        error = prediction - y
        cost = 1/(2*m) * np.dot(error.T, error)
        past_costs.append(cost)
        theta = theta - (alpha * (1/m) * np.dot(x.T, error))
        past_thetas.append(theta)
        
    return past_thetas, past_costs, error

past_thetas, past_costs, error = gradient_descent(x, y, theta, iterations, alpha)
theta = past_thetas[-1]

print("\033[1m" + "Computing thetas finished!" + "\033[0m")
print("Gradient Descent: {:.2f}, {:.2f}".format(theta[0], theta[1]))

## THETAS
<a id='THETAS'></a>

In [None]:
plt.title('Theta0 and Theta1 evolution')
plt.xlabel('No. of iterations')
plt.ylabel('Thetas')
plt.plot(past_thetas)
plt.show()

## COST FUNCTION
<a id='DATA'></a>

In [None]:
plt.title('Cost Function J')
plt.xlabel('No. of iterations')
plt.ylabel('Cost')
plt.plot(past_costs)
plt.show()

## ERROR STATS
<a id='ERROR STATS'></a>

In [None]:
AE = np.sum(np.abs(error))
MAE = AE/error.size
SE = np.sum(np.square(error))
MSE = SE/error.size
RMSE = np.sqrt(MSE)
rMSE = MSE /tempvar
R2=1-rMSE
print("\033[1m" + "Computing Error stats finished!" + "\033[0m")
print("Car Sample size                     ", error.size)
print("Absolute Error AE is                ", AE)
print("Mean Absolute Error MAE is          ", MAE)
print("Squared Error SE is                 ", SE)
print("Mean Squared Error MSE is           ", MSE)
print("Root Mean Squared Error RMSE is     ", RMSE)
print("Relative Mean Squared Error rMSE is ", rMSE)
print("R squared is                        ", R2)

## REGRESSION ANIMATION
<a id='DATA'></a>

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.title('Car Price vs Mileage')
plt.xlabel('Mileage in km (normalised)')
plt.ylabel('Car Price (€)')
plt.scatter(x[:,1], y, color='red')
line, = ax.plot([], [], lw=2)
annotation1 = ax.text(1.5, 8000, '')
annotation1.set_animated(True)
annotation2 = ax.text(1.5, 7500, '')
annotation2.set_animated(True)
annotation3 = ax.text(1.5, 7000, '')
annotation3.set_animated(True)
plt.close()

def init():
    line.set_data([], [])
    annotation1.set_text('')
    return line, annotation1

def animate(i):
    x = np.linspace(-5, 5, 1000)
    y = past_thetas[i][1]*x + past_thetas[i][0]
    line.set_data(x, y)
    annotation1.set_text('Cost = %.2f e5' % (past_costs[i]/100000))
    annotation2.set_text('theta0 = %.2f' % (past_thetas[i][0]))
    annotation3.set_text('theta1 = %.2f' % (past_thetas[i][1]))
    return line, annotation1

anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=300, interval=0, blit=True)

anim.save('animation.gif', writer='imagemagick', fps = 30)
print("\033[1m" + "Computing animation finished!" + "\033[0m")

In [None]:
filename = 'animation.gif'

video = io.open(filename, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<img src="data:image/gif;base64,{0}" type="gif" />'''.format(encoded.decode('ascii')))

## PREDICTION
<a id='DATA'></a>

In [None]:
print("Please enter a mileage in km")
a = input()
if a.isnumeric():
    result = ((float(a)-tempmean)/tempstd)*(theta[1]) + theta[0]
    print("Predicted Car Price for", a, "km mileage is", result, "€")
    
else:
    print("Please rerun the cell and make sure to enter a real mileage")