In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('encoded_df.csv')

In [3]:
# Define the features and target
features = df.drop('total_sales_price', axis=1).values
target = df['total_sales_price'].values

In [4]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [5]:
# Convert the data to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [6]:
# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [7]:
# Create an instance of the neural network
net = Net()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [8]:
# Define the training loop
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred.squeeze(), y)
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [9]:
# Define the testing loop
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred.squeeze(), y).item()
    test_loss /= num_batches
    print(f"Avg loss: {test_loss:>8f} \n")

In [10]:
# Define the training and testing data loaders
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

test_data = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

In [11]:
# Train the model
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, net, criterion, optimizer)
    test_loop(test_loader, net, criterion)
print("Done!")

Epoch 1
-------------------------------
loss: 369477120.000000  [    0/126904]
loss: 3128015872.000000  [ 6400/126904]
loss: 267920848.000000  [12800/126904]
loss: 433742208.000000  [19200/126904]
loss: 443510592.000000  [25600/126904]
loss: 252856368.000000  [32000/126904]
loss: 2230587392.000000  [38400/126904]
loss: 30249394.000000  [44800/126904]
loss: 647718656.000000  [51200/126904]
loss: 81602328.000000  [57600/126904]
loss: 583979584.000000  [64000/126904]
loss: 59397504.000000  [70400/126904]
loss: 444373056.000000  [76800/126904]
loss: 436533152.000000  [83200/126904]
loss: 33269876.000000  [89600/126904]
loss: 61146048.000000  [96000/126904]
loss: 51473400.000000  [102400/126904]
loss: 199533584.000000  [108800/126904]
loss: 476832576.000000  [115200/126904]
loss: 225277152.000000  [121600/126904]
Avg loss: 544576092.141129 

Epoch 2
-------------------------------
loss: 35557108.000000  [    0/126904]
loss: 56725896.000000  [ 6400/126904]
loss: 170242384.000000  [12800/1269