In [44]:
import random as rd
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

data = {'year': [2018, 2015, 2020, 2012],
        'km': [50000, 80000, 20000, 120000],
        'engine_size': [1.6, 2.0, 1.8, 1.4],
        'brand': ['Toyota', 'Honda', 'Volkswagen', 'Ford'],
        'price': [120000, 90000, 150000, 60000]}


df = pd.DataFrame(data)

label_encoder = LabelEncoder()
df["brand_encoded"] = label_encoder.fit_transform(df["brand"])


X_train = df[['year', 'km', 'engine_size', 'brand_encoded']]
y_train = df['price']


def predict(x, w, b):
  return np.dot(x, w) + b


def compute_cost(X_train, y_train, w, b):
  m = X_train.shape[0]
  cost = 0
  for i in range(m):
    hypothesis = predict(X_train.iloc[i], w, b)
    cost += (hypothesis - y_train.iloc[i])**2
  return cost / (2 * m)


w_init = np.random.rand(X_train.shape[1])
b_init = 0.


cost = compute_cost(X_train, y_train, w_init, b_init)
print(f"Initial cost with random weights: {cost:.4f}")


def gradient_descent(X_train, y_train, w, b, learning_rate, num_iterations):
    m = X_train.shape[0]  

    for iteration in range(num_iterations):
        
        dw = np.zeros_like(w)
        db = 0

        for i in range(m):
            
            y_pred = predict(X_train.iloc[i], w, b)

            
            dw += (y_pred - y_train.iloc[i]) * X_train.iloc[i]
            db += (y_pred - y_train.iloc[i])

        
        dw /= m
        db /= m

        
        w -= learning_rate * dw
        b -= learning_rate * db

       
        if iteration % 100 == 0:
            cost = compute_cost(X_train, y_train, w, b)
            print(f"Iteration {iteration}: Cost = {cost:.4f}")

    return w, b



learning_rate = 0.001
num_iterations = 1000


w_optimized, b_optimized = gradient_descent(X_train, y_train, w_init, b_init, learning_rate, num_iterations)


print("Optimized parameters:")
print("Weights:", w_optimized)
print("Bias:", b_optimized)


Initial cost with random weights: 4398246483.6399
