In [13]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [14]:
df = pd.read_csv('encoded_df.csv')

In [15]:
# Define the features and target
features = df.drop('total_sales_price', axis=1).values
target = df['total_sales_price'].values

In [16]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [17]:
# Convert the data to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [18]:
# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [19]:
# Create an instance of the neural network
net = Net()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [20]:
# Define the training loop
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred.squeeze(), y)
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [21]:
# Define the testing loop
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred.squeeze(), y).item()
    test_loss /= num_batches
    print(f"Avg loss: {test_loss:>8f} \n")

In [22]:
# Define the training and testing data loaders
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

test_data = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

In [23]:
# Train the model
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, net, criterion, optimizer)
    test_loop(test_loader, net, criterion)
print("Done!")

Epoch 1
-------------------------------
loss: 2959529984.000000  [    0/89299]
loss: 78443984.000000  [ 6400/89299]
loss: 123992592.000000  [12800/89299]
loss: 3124806912.000000  [19200/89299]
loss: 37214112.000000  [25600/89299]
loss: 83299448.000000  [32000/89299]
loss: 17646776.000000  [38400/89299]
loss: 3065822720.000000  [44800/89299]
loss: 2002971648.000000  [51200/89299]
loss: 44293656.000000  [57600/89299]
loss: 14049783808.000000  [64000/89299]
loss: 77108600.000000  [70400/89299]
loss: 758720512.000000  [76800/89299]
loss: 71938952.000000  [83200/89299]
Avg loss: 413537919.530086 

Epoch 2
-------------------------------
loss: 23753980.000000  [    0/89299]
loss: 105805024.000000  [ 6400/89299]
loss: 242497536.000000  [12800/89299]
loss: 56001112.000000  [19200/89299]
loss: 58420940.000000  [25600/89299]
loss: 390867552.000000  [32000/89299]
loss: 329655904.000000  [38400/89299]
loss: 22796316.000000  [44800/89299]
loss: 256348032.000000  [51200/89299]
loss: 280089440.000000