In [214]:
import pandas as pd
import numpy as np

In [215]:
X = pd.read_csv("CarPrice_Assignment.csv")
y = X['price']
y = y.values.reshape(-1, 1)

In [216]:
#Preprocessing
X.drop(['car_ID','price'], axis=1, inplace=True)
X["doornumber"] = X["doornumber"].replace("four",4).replace("two",2)
X["cylindernumber"] = X["cylindernumber"].replace({"four": 4, "five": 5, "six": 6, "three": 3, "twelve": 12, "two": 2, "eight": 8})

X = pd.get_dummies(X, columns=['CarName','fueltype','aspiration','carbody','drivewheel','enginelocation','enginetype','fuelsystem'])
X = X.astype(float)

In [217]:
# Normalizing the features
X = (X - X.mean(axis=0)) / X.std(axis=0)
y = (y - y.mean(axis=0)) / y.std(axis=0)

X = np.hstack((X ,np.ones((X.shape[0],1))))

In [218]:
def gradient_descent(X, y, weights, alpha, iterations):
    m = len(X)
    cost_history = np.zeros(iterations)
    
    for i in range(iterations):
        predictions = np.dot(X, weights)
        error = predictions - y
        gradient = (1/m) * np.dot(X.T, error)
        weights -= alpha * gradient
        cost_history[i] = (1/(2*m)) * np.dot(error.T, error)
        if np.isnan(cost_history[i]):
            print(f"NaN encountered at iteration {i}")
            break
    
    return weights, cost_history

In [219]:
def linear_regression(X, y, alpha, iterations):
    weights = np.zeros((X.shape[1], 1))
    weights, cost_history = gradient_descent(X, y, weights, alpha, iterations)
    
    return weights, cost_history

In [220]:
#Train Test Split
ratio = 0.8

rows = X.shape[0]
train_size = int(ratio*rows)

X_train = X[0:train_size]
X_test = X[train_size:]

y_train = y[0:train_size]
y_test = y[train_size:]

In [221]:
alpha = 0.1648
iterations = 10000

weights, cost_history = linear_regression(X_train, y_train, alpha, iterations)

print("Optimized theta parameters:", weights)
print("Final cost:", round(cost_history[-1],10))

  cost_history[i] = (1/(2*m)) * np.dot(error.T, error)


Optimized theta parameters: [[ 7.50561536e-02]
 [ 6.55936813e-02]
 [-3.21805886e-02]
 [-1.89919072e-01]
 [ 1.38840776e-01]
 [-4.22853921e-02]
 [ 8.02923119e-01]
 [ 2.31785715e-02]
 [ 3.07927552e-01]
 [-9.56187797e-02]
 [-7.88338859e-02]
 [-6.57149056e-02]
 [ 9.16978927e-03]
 [ 2.45692259e-01]
 [ 3.01003405e-01]
 [-1.28851495e-01]
 [ 1.55065876e-03]
 [ 1.94191360e-04]
 [-4.29138437e-02]
 [-1.65781356e-02]
 [ 3.26933452e-02]
 [ 9.00981162e-03]
 [ 2.53610839e-02]
 [ 2.27126538e-02]
 [ 3.65604641e-03]
 [ 2.60660665e-02]
 [ 1.99435975e-02]
 [ 5.76735817e-02]
 [ 6.98829014e-02]
 [ 3.70583308e-02]
 [ 1.20842165e-01]
 [ 4.40751156e-02]
 [-2.22052055e-02]
 [ 8.67742503e-03]
 [ 7.15017682e-02]
 [-1.10378115e-02]
 [ 3.88864356e-02]
 [ 8.70884385e-02]
 [ 2.26040617e-02]
 [ 1.53558642e-02]
 [ 3.94642845e-02]
 [-1.61652621e-02]
 [-2.94130737e-02]
 [-4.01825049e-03]
 [-2.48922638e-02]
 [-3.29288176e-02]
 [-1.16203423e-02]
 [-9.36721549e-03]
 [ 1.90069412e-04]
 [ 9.93652597e-03]
 [-2.45658386e-02]
 [-

In [222]:
def accuracy(X_test, y_test, weights):
    y_pred = np.dot(X_test, weights)
    
    mse = np.mean((y_test - y_pred) ** 2)
    
    mae = np.mean(np.abs(y_test - y_pred))
    
    ss_total = np.sum((y_test - np.mean(y_test)) ** 2)
    ss_res = np.sum((y_test - y_pred) ** 2)
    r_squared = 1 - (ss_res / ss_total)
    
    return mse, mae, r_squared

mse, mae, r_squared = accuracy(X_test, y_test, weights)

print("MSE:", mse)
print("MAE:", mae)
print("R-squared:", r_squared)


MSE: 0.35370177260539265
MAE: 0.4513611829714883
R-squared: -0.24836735595429582
