In [2]:
import sys
import pandas as pd
import torch
from torch import nn

# Pandas: Reading the data
df = pd.read_csv("../starter/used_cars.csv")

# Pandas: Preparing the data
age = df["model_year"].max() - df["model_year"]

milage = df["milage"]
milage = milage.str.replace(",", "")
milage = milage.str.replace(" mi.", "")
milage = milage.astype(int)

# Create an 'accident_free' feature where value is 1 if df["accident"] == "None reported", else 0
accident_free = df["accident"] == "None reported"
accident_free = accident_free.astype(int)

price = df["price"]
price = price.str.replace("$", "")
price = price.str.replace(",", "")
price = price.astype(int)

# Torch: Creating X and y data (as tensors)
X = torch.column_stack([
    torch.tensor(accident_free, dtype=torch.float32), # Add accident_free as the first column
    torch.tensor(age, dtype=torch.float32),
    torch.tensor(milage, dtype=torch.float32)
])
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X = (X - X_mean) / X_std

y = torch.tensor(price, dtype=torch.float32).reshape((-1, 1))
y_mean = y.mean()
y_std = y.std()
y = (y - y_mean) / y_std
# sys.exit()

model = nn.Linear(3, 1)  # Change input dimension from 2 to 3
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

for i in range(0, 10000):
    # Training pass
    optimizer.zero_grad()
    outputs = model(X)
    loss = loss_fn(outputs, y)
    loss.backward()
    optimizer.step()

    if i % 100 == 0:
        print(loss)

X_data_accident = torch.tensor([
    [0, 5, 10000],  # Add a leading 0/1 for accident_free in each row
    [0, 2, 10000],
    [0, 5, 20000]
], dtype=torch.float32)

with torch.no_grad():
    prediction_accident = model((X_data_accident - X_mean) / X_std)
    print(prediction_accident * y_std + y_mean)

tensor(1.3588, grad_fn=<MseLossBackward0>)
tensor(1.1909, grad_fn=<MseLossBackward0>)
tensor(1.0940, grad_fn=<MseLossBackward0>)
tensor(1.0357, grad_fn=<MseLossBackward0>)
tensor(0.9992, grad_fn=<MseLossBackward0>)
tensor(0.9751, grad_fn=<MseLossBackward0>)
tensor(0.9587, grad_fn=<MseLossBackward0>)
tensor(0.9471, grad_fn=<MseLossBackward0>)
tensor(0.9385, grad_fn=<MseLossBackward0>)
tensor(0.9321, grad_fn=<MseLossBackward0>)
tensor(0.9272, grad_fn=<MseLossBackward0>)
tensor(0.9234, grad_fn=<MseLossBackward0>)
tensor(0.9204, grad_fn=<MseLossBackward0>)
tensor(0.9179, grad_fn=<MseLossBackward0>)
tensor(0.9159, grad_fn=<MseLossBackward0>)
tensor(0.9143, grad_fn=<MseLossBackward0>)
tensor(0.9130, grad_fn=<MseLossBackward0>)
tensor(0.9119, grad_fn=<MseLossBackward0>)
tensor(0.9110, grad_fn=<MseLossBackward0>)
tensor(0.9102, grad_fn=<MseLossBackward0>)
tensor(0.9095, grad_fn=<MseLossBackward0>)
tensor(0.9090, grad_fn=<MseLossBackward0>)
tensor(0.9085, grad_fn=<MseLossBackward0>)
tensor(0.90