In [1]:
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
# Setup device agnostic code
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


# Reading/processing the data

In [3]:
CAR_DATA = "car_data.csv"
dataframe_raw = pd.read_csv(CAR_DATA)
dataframe_raw.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
# Set the column of what to display
columns = ["Year", "Selling_Price", "Present_Price", "Kms_Driven", "Fuel_Type", "Transmission"]
dataframe = pd.read_csv(CAR_DATA, usecols=columns)

# Sorting the dataframe
dataframe = dataframe.sort_values(by="Year")
dataframe.head()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Transmission
37,2003,0.35,2.28,127000,Petrol,Manual
39,2003,2.25,7.98,62000,Petrol,Manual
77,2004,1.5,12.35,135154,Petrol,Automatic
54,2005,2.75,10.21,90000,Petrol,Manual
84,2005,3.49,13.46,197176,Diesel,Manual


In [5]:
# Scaling the dataset
scaler = MinMaxScaler(feature_range=(-1, 1))
dataframe["Selling_Price"] = scaler.fit_transform(dataframe["Selling_Price"].values.reshape(-1, 1))
dataframe["Present_Price"] = scaler.fit_transform(dataframe["Present_Price"].values.reshape(-1, 1))
dataframe["Kms_Driven"] = scaler.fit_transform(dataframe["Kms_Driven"].values.reshape(-1, 1))

dataframe.head()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Transmission
37,2003,-0.985673,-0.957521,-0.493493,Petrol,Manual
39,2003,-0.876791,-0.833984,-0.753754,Petrol,Manual
77,2004,-0.919771,-0.739272,-0.460845,Petrol,Automatic
54,2005,-0.848138,-0.785652,-0.641642,Petrol,Manual
84,2005,-0.805731,-0.715215,-0.212509,Diesel,Manual


In [6]:
# Catorizing the rows, columns, and variables
input_cols = ["Year", "Present_Price", "Kms_Driven"]
categorical_cols = ["Fuel_Type", "Transmission"]
output_cols = ["Selling_Price"]

In [7]:
# Converting from dataframe to numpy arrays
def dataframe_to_arrays(dataframe):
    # Make a copy of the original dataframe
    dataframe1 = dataframe.copy(deep=True)
    
    # Convert non-numeric categorical columns to numbers
    for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
        
    # Extract input & outupts as numpy arrays
    inputs_array = dataframe1[input_cols].to_numpy()
    targets_array = dataframe1[output_cols].to_numpy()
    
    return inputs_array, targets_array

inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs_array[:10], targets_array[:10]

(array([[ 2.00300000e+03, -9.57520590e-01, -4.93493493e-01],
        [ 2.00300000e+03, -8.33983528e-01, -7.53753754e-01],
        [ 2.00400000e+03, -7.39271782e-01, -4.60844845e-01],
        [ 2.00500000e+03, -7.85652362e-01, -6.41641642e-01],
        [ 2.00500000e+03, -7.15214564e-01, -2.12508509e-01],
        [ 2.00500000e+03, -7.10013004e-01, -7.01701702e-01],
        [ 2.00500000e+03, -9.94581708e-01, -7.81781782e-01],
        [ 2.00600000e+03, -4.92631123e-01, -4.33433433e-01],
        [ 2.00600000e+03, -9.16991764e-01, -7.41741742e-01],
        [ 2.00600000e+03, -9.90680537e-01, -6.32700701e-01]]),
 array([[-0.98567335],
        [-0.87679083],
        [-0.91977077],
        [-0.84813754],
        [-0.80573066],
        [-0.80458453],
        [-0.99426934],
        [-0.86246418],
        [-0.94555874],
        [-1.        ]]))

In [21]:
# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

# Converting from numpy arrays to pytorch tensors
inputs = torch.Tensor(inputs_array)
targets = torch.Tensor(targets_array)

# Creating training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(inputs, targets, random_state=42)

# Create custom datasets
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

# Define batch size
batch_size = 16

# Split up the dataset into batches
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [40]:
test_dataset[0]

(tensor([ 2.0150e+03, -7.8197e-01, -8.2182e-01]), tensor([-0.5788]))

# Creating the model

In [9]:
input_size = len(input_cols)
output_size = len(output_cols)
input_size, output_size

(3, 1)

In [22]:
# Creating a functions
class CarPredictionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_layer = nn.Linear(in_features=3, out_features=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear_layer(x)

# Set the manual seed
torch.manual_seed(42)

model_1 = CarPredictionModel()
model_1.state_dict()

OrderedDict([('linear_layer.weight', tensor([[ 0.4414,  0.4792, -0.1353]])),
             ('linear_layer.bias', tensor([0.5304]))])

In [37]:
# Setup loss function
loss_fn = nn.L1Loss() # same as MAE

# Setup optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=1e-7)

# Training the Model

In [38]:
# Creating the training loop
torch.manual_seed(42)

epochs = 500

for epoch in range(epochs):
    model_1.train()
    epoch_loss = 0.0
    total_batches = len(train_loader)  # Total number of batches

    for batch_idx, (inputs, targets) in enumerate(train_loader, start=1):
        # 1. Forward pass
        y_pred = model_1(inputs)

        # 2. Calculate the loss
        loss = loss_fn(y_pred, targets)

        # 3. Zero the optimizer
        optimizer.zero_grad()

        # 4. Perform backpropagation
        loss.backward()

        # 5. Perform gradient descent
        optimizer.step()

        # Accumulate the loss for each epoch
        epoch_loss += loss.item()

        if epoch % 50 == 0 and batch_idx == total_batches:
            print(f"Epoch {epoch}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

Epoch 0/500, Loss: 9.9366
Epoch 50/500, Loss: 0.1910
Epoch 100/500, Loss: 0.1948
Epoch 150/500, Loss: 0.2100
Epoch 200/500, Loss: 0.1930
Epoch 250/500, Loss: 0.2087
Epoch 300/500, Loss: 0.1783
Epoch 350/500, Loss: 0.1721
Epoch 400/500, Loss: 0.1715
Epoch 450/500, Loss: 0.1637


In [39]:
# Creating evaluation loop
model_1.eval()
total_loss = 0.0

with torch.no_grad():  # Disable gradient computation
        for inputs, targets in test_loader:
            # Forward pass: Compute predictions
            outputs = model_1(inputs)
            
            # Compute loss
            loss = loss_fn(outputs, targets)
            
            # Accumulate loss
            total_loss += loss.item()
    
# Print average test loss
print(f"Test Loss: {total_loss / len(test_loader)}")

Test Loss: 0.275289848446846


# Using the Model to Predict Car Prices

In [59]:
# Prediction Algorithm
def predict_single(input, target, model):
    inputs = input.unsqueeze(0) # adds an extra batch dimension
    predictions = model(inputs)
    prediction = predictions[0].detach() # detach it from computation graph
    
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)

# Testing the model with some samples
input, target = test_dataset[7]
predict_single(input, target, model_1)

Input: tensor([ 2.0150e+03, -8.5912e-01, -8.7617e-01])
Target: tensor([-0.6963])
Prediction: tensor([-1.0264])
