# Importing and Pre-processing Dataset

In [1]:
#@title Import Dataset
#@markdown Dataset taken from [here](https://raw.githubusercontent.com/selva86/datasets/master/Cars93.csv)
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93.csv')

print_dataset = False #@param {type:"boolean"}
if print_dataset:
  print(df.to_string())

In [2]:
#@title Remove irrelevant variables
#@markdown Removes:

#@markdown "Manufacturer", "Model", "DriveTrain", "Type", "Origin", "Make", "AirBags", "Min.Price", "Max.Price"
df2=df.drop(columns={'Manufacturer','Model','DriveTrain','Type','Origin','Make','AirBags','Min.Price','Max.Price'}, axis=1)

print_dataset = False #@param {type:"boolean"}
if print_dataset:
  print(df.to_string())

In [3]:
#@title Pre-process string data to number
df2.replace({'Man.trans.avail':{"Yes":1,"No":0}},inplace=True)
df2 = df2[df2.Cylinders != 'rotary']
df2=df2.dropna(axis=0)
df2=df2.astype(float)

print_data_types = False #@param {type:"boolean"}
if print_data_types:
  print(df2.dtypes)
print_dataset = False #@param {type:"boolean"}
if print_dataset:
  print(df.to_string())

In [4]:
#@title Convert dataframe to NumPy
import numpy as np

#@markdown Only uses "Horsepower" and "Fuel.tank.capacity" as input variables to predict "Price"
Price                 = df2['Price'].values
# MPGcity              = df2['MPG.city'].values
# MPGhighway           = df2['MPG.highway'].values
# Cylinders             = df2['Cylinders'].values
# EngineSize            = df2['EngineSize'].values
Horsepower            = df2['Horsepower'].values
# RPM                   = df2['RPM'].values
# Revpermile          = df2['Rev.per.mile'].values
# Mantransavail       = df2['Man.trans.avail'].values
Fueltankcapacity    = df2['Fuel.tank.capacity'].values
# Passengers            = df2['Passengers'].values
# Length                = df2['Length'].values
# Wheelbase             = df2['Wheelbase'].values
# Width                 = df2['Width'].values
# Turncircle           = df2['Turn.circle'].values
# Rearseatroom        = df2['Rear.seat.room'].values
# Luggageroom          = df2['Luggage.room'].values
# Weight                = df2['Weight'].values

# Constants
m = len(Price)
x0 = np.ones(m)

#@markdown Input table (X):
#@markdown <table> <tr> <th>Constants</th> <th>Horse Power</th> <th>Fuel Tank Capacity</th> </tr> <tr> <td>...</td> <td>...</td> <td>...</td> </tr></table>
X = np.array([x0,Horsepower,Fueltankcapacity ]).T

print_x_table = False #@param {type:"boolean"}
if print_x_table:
  print(np.array(X))

#@markdown Output table (Y):
#@markdown <table> <tr> <th>Price</th> </tr> <tr> <td>...</td> </tr></table>
Y=df2['Price']

print_y_table = False #@param {type:"boolean"}
if print_y_table:
  print(np.array(Y))

# Training
Training using Gradient Descent Algorithm

In [5]:
#@title Initialize Coefficient
B = np.array([0, 0, 0])

In [6]:
#@title Cost Function
#@markdown Cost Function:

#@markdown $J(X,Y,B)=\frac{1}{2m}\sum(X_{i}\cdot B - Y_{i})^{2}$
def cost_function(X, Y, B):
    m = len(Y)
    J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
    return J

print_initial_cost = False #@param {type:"boolean"}
if print_initial_cost:
  print("Initial Cost: " + str(cost_function(X, Y, B)))

In [7]:
#@title Gradient Descent Function
#@markdown Iteration to train B

#@markdown $B_{i+1} = B_{i} - \frac{\alpha}{m}X^{T}\cdot (X\cdot B_{i} - Y)$
def gradient_descent(X, Y, B, alpha, iterations):
    cost_history = [0] * iterations
    m = len(Y)
    
    for iteration in range(iterations):
        # Hypothesis Values
        h = X.dot(B)
        # Difference b/w Hypothesis and Actual Y
        loss = h - Y
        # Gradient Calculation
        gradient = X.T.dot(loss) / m
        # Changing Values of B using Gradient
        B = B - alpha * gradient
        # New Cost Value
        cost = cost_function(X, Y, B)
        cost_history[iteration] = cost
        
    return B, cost_history

In [20]:
#@title Training
#@markdown Alpha should be low for better convergence

#@markdown Lower alpha needs more epoch or iteration
alpha = 0.000001 #@param {type:"number"}
epoch = 500 #@param {type:"integer"}
newB, cost_history = gradient_descent(X, Y, B, 0.000001, 500)

print_last_cost_result = False #@param {type:"boolean"}
if print_last_cost_result:
  print("Last Cost: " + str(cost_history[-1]))

# Testing and Error Calculation

In [21]:
#@title Use the trained coefficient to predict Y from X
Y_pred = X.dot(newB)

print_y_pred = False #@param {type:"boolean"}
if print_y_pred:
  print(Y_pred)

In [10]:
#@title Define error calculation function
#@markdown Root Mean Square Error

#@markdown $RMSE = \sqrt{\sum \frac{(Y-Y_{pred})^{2}}{n}}$
def rmse(Y, Y_pred):
    rmse = np.sqrt(sum((Y - Y_pred) ** 2) / len(Y))
    return rmse

#@markdown Coefficient of Determination Function

#@markdown $R^{2} = 1 - \frac{(Y-Y_{mean})^{2}}{(Y-Y_{pred})^{2}}$
def r2_score(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = sum((Y - mean_y) ** 2)
    ss_res = sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

In [22]:
#@title Error calculation
print("Calculated RMSE: " + str(rmse(Y, Y_pred)))
print("Calculated R2: " + str(r2_score(Y, Y_pred)))

Calculated RMSE: 6.155450707109362
Calculated R2: 0.6132865111220462


# Inference and Prediction

In [24]:
#@title Input
horse_power = 0 #@param {type:"number"}
fuel_capacity = 100 #@param {type:"number"}

In [25]:
#@title Predict
predicted_price = np.array([1, horse_power, fuel_capacity]).dot(newB)
print("Predicted Price: " + str(predicted_price))

Predicted Price: 1.4549635825943183
