In [19]:
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import time
import pandas as pd

Model Definition

In [28]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(573, 2048)
        self.hidden1 = nn.Linear(2048, 1024)
        self.hidden2 = nn.Linear(1024, 512)
        self.hidden3 = nn.Linear(512, 256)
        self.hidden4 = nn.Linear(256, 128)
        self.hidden5 = nn.Linear(128, 64)
        self.hidden6 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        
    def forward(self, x):
        x = F.relu(self.input(x))
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        x = F.relu(self.hidden4(x))
        x = F.relu(self.hidden5(x))
        x = F.relu(self.hidden6(x))
        x = self.output(x)
        return x

Variables Definintions

In [21]:
t = int(time.time())
useCUDA = True
dataPath = "../large_field_preprocessed_data.csv"
epochs = 50
batchSize = 32
modelPath = f"../trained_models/{t}"

Device Check

In [22]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if not useCUDA:
	device = torch.device('cpu')

In [23]:
# xTensor = torch.tensor(data.drop(columns=['INCWAGE_CPIU_2010']).values, dtype=torch.float32)
# yTensor = torch.tensor(data['INCWAGE_CPIU_2010'].values, dtype=torch.float32)

Model and Dataset Creation

In [29]:
net = Net()
net = net.to(device)


trainSet = torch.load("../dataset/trainSet.pt")
# trainSet = trainSet.to(device)
# create dataloader for both train and test
trainLoader = DataLoader(trainSet, batch_size=batchSize, shuffle=True)

Training

In [30]:

optimizer = optim.Adam(net.parameters(), lr =1e-5)

criterion = nn.MSELoss(reduction='mean')

print("Epochs Started")

for epoch in range(epochs):
	running_loss = 0.0
	for i, data in enumerate(trainLoader):
		X, y = data
		y = y.unsqueeze(1)
		X = X.to(device)
		y = y.to(device)

		net.zero_grad()

		output = net(X)

		loss = criterion(output, y)
		loss.backward()
		optimizer.step()
		running_loss += loss.item()
		if i % 1000 == 999:    # print every 1000 mini-batches
			print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.3f}')
			running_loss = 0.0

torch.save(net.state_dict(), modelPath)
torch.cuda.empty_cache()


Epochs Started
[1,  1000] loss: 4402729116.480
[1,  2000] loss: 2992651744.416
[1,  3000] loss: 3014048884.224
[1,  4000] loss: 2783807846.432
[1,  5000] loss: 2922727249.376
[1,  6000] loss: 2833529335.744
[1,  7000] loss: 2753362227.616
[1,  8000] loss: 2839281310.080
[1,  9000] loss: 2675294500.064
[1, 10000] loss: 2788983097.216
[1, 11000] loss: 2765732781.312
[1, 12000] loss: 2608823484.064
[1, 13000] loss: 2542186785.296
[1, 14000] loss: 2599845117.440
[1, 15000] loss: 2528826241.952
[1, 16000] loss: 2515833738.752
[1, 17000] loss: 2465628904.224
[1, 18000] loss: 2305377943.360
[1, 19000] loss: 2155122806.288
[1, 20000] loss: 2370942224.624
[1, 21000] loss: 2193116138.416
[1, 22000] loss: 2321080503.776
[1, 23000] loss: 2265057340.464
[1, 24000] loss: 2116005688.992
[1, 25000] loss: 2194132947.328
[1, 26000] loss: 2206054497.136
[1, 27000] loss: 2057425385.952
[1, 28000] loss: 2206997738.128
[1, 29000] loss: 2109864402.976
[1, 30000] loss: 2158060804.720
[1, 31000] loss: 19633887