In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1212)
torch.cuda.manual_seed(1212)
import pandas as pd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
data = pd.read_csv('cars.csv',encoding='cp1252',index_col=0)
data.head()

Unnamed: 0_level_0,Rating,Gas Mileage,Engine,EPA Class,Style Name,Drivetrain,Passenger Capacity,Passenger Doors,Body Style,Base Curb Weight (lbs),...,Second Shoulder Room (in),Second Head Room (in),Front Shoulder Room (in),Front Head Room (in),Second Leg Room (in),Wheelbase (in),"Track Width, Rear (in)","Height, Overall (in)","Length, Overall (in)","Track Width, Front (in)"
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
,,,,,,,,,,,...,,,,,,,,,,
2021 Kia Niro,5.666667,51 mpg City/46 mpg Hwy,"Gas/Electric I-4, 1.6 L",Small Station Wagons,EX Premium FWD,Front Wheel Drive,5.0,4.0,Sport Utility,3122.0,...,55.2,39.1,56.0,40.1,37.4,106.3,62.2,60.4,171.5,61.6
,,,,,,,,,,,...,,,,,,,,,,
2021 Tesla Model 3,7.5,,Electric,Midsize Cars,Long Range AWD,All Wheel Drive,5.0,4.0,4dr Car,4250.0,...,54.0,37.7,56.3,40.3,35.2,113.2,62.2,56.8,184.8,62.2
,,,,,,,,,,,...,,,,,,,,,,


Here we can see a small sample of the data set, and the values of a handful of the input parameters.

In [3]:
y=data['Rating'].values
x = data.iloc[:,2:].values

In [4]:
y=y.round(1)


We will have to assign labels to each of the non-numeric values in the data

In [5]:
engine_labels = []
for i in range(len(x[:,0])):
    try: 
        ind = engine_labels.index(x[i,0])
        x[i,0]=ind
    except ValueError:
        engine_labels.append(x[i,0])
        ind = engine_labels.index(x[i,0])
        x[i,0] = ind

In [6]:
epa_labels = []
for i in range(len(x[:,1])):
    try: 
        ind = epa_labels.index(x[i,1])
        x[i,1]=ind
    except ValueError:
        epa_labels.append(x[i,1])
        ind = epa_labels.index(x[i,1])
        x[i,1] = ind

In [7]:
style_labels = []
for i in range(len(x[:,2])):
    try: 
        ind = style_labels.index(x[i,2])
        x[i,2]=ind
    except ValueError:
        style_labels.append(x[i,2])
        ind = style_labels.index(x[i,2])
        x[i,2] = ind

In [8]:
drivetrain_labels = []
for i in range(len(x[:,3])):
    try: 
        ind = drivetrain_labels.index(x[i,3])
        x[i,3]=ind
    except ValueError:
        drivetrain_labels.append(x[i,3])
        ind = drivetrain_labels.index(x[i,3])
        x[i,3] = ind

In [9]:
body_style_labels = []
for i in range(len(x[:,6])):
    try: 
        ind = body_style_labels.index(x[i,6])
        x[i,6]=ind
    except ValueError:
        body_style_labels.append(x[i,6])
        ind = body_style_labels.index(x[i,6])
        x[i,6] = ind

In [10]:
#transmission_labels = []
#for i in range(len(x[:,7])):
#    try: 
#        ind = transmission_labels.index(x[i,7])
#        x[i,7]=ind
#    except ValueError:
#        transmission_labels.append(x[i,7])
#        ind = transmission_labels.index(x[i,7])
#        x[i,7] = ind

Now, all of the categorical input parameters should be in numeric class form (this section will be expanded in the final version)

In [11]:
x = x.astype('float64')
print(x)

In [12]:
from sklearn.model_selection import train_test_split
xtr1, xts, ytr1, yts = train_test_split(x,y,test_size=0.33)
xtr, xval, ytr, yval = train_test_split(xtr1,ytr1,test_size=0.33)


In [13]:
xtr = torch.from_numpy(xtr)
xval = torch.from_numpy(xval)
xts = torch.from_numpy(xts)
ytr = torch.from_numpy(ytr)
yval = torch.from_numpy(yval)
yts = torch.from_numpy(yts)

trainset = [(xtr[i],ytr[i]) for i in range(len(xtr))]
valset = [(xval[i],yval[i]) for i in range(len(xval))]
testset = [(xts[i],yts[i]) for i in range(len(xts))]

In [14]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(20,1000),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(1000,2000),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(2000,1000),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(1000,100),
            nn.Sigmoid()
        )
    def forward(self,x):
        output = self.model(x)
        return output


In [15]:
discriminator = Discriminator()
discriminator = discriminator.to(device)

In [16]:
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(20,1000),
            nn.ReLU(),
            nn.Linear(1000,2000),
            nn.ReLU(),
            nn.Linear(2000,20),
        )
    def forward(self,x):
        output = self.model(x)
        return output

In [17]:
generator = Generator()
generator = generator.to(device)

In [18]:
lr = 0.001
epochs = 10
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

In [19]:
discr_opt = optim.Adam(discriminator.parameters(), lr=lr)
gen_opt = optim.Adam(generator.parameters(),lr=lr)

In [20]:
batch_size = 1
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle = True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size)

In [26]:
for epoch in range(10):
    for n, (real_samples, real_labels) in enumerate(train_loader):
        real_samples= real_samples.cuda()
        real_labels=real_labels.long().cuda()
        latent_space_samples = torch.randn((batch_size,20)).cuda()
        generated_samples = generator(latent_space_samples).cuda()
        generated_samples_labels = torch.zeros((batch_size)).long().cuda()
        all_samples = torch.cat((real_samples,generated_samples)).float().cuda()
        all_labels = torch.cat((real_labels,generated_samples_labels)).cuda()
        #discr train
        discriminator.zero_grad()
        disout = discriminator(all_samples)
        disloss = criterion(disout, all_labels)
        disacc = 0
        disloss.backward()
        discr_opt.step()
        
        latent_space_samples = torch.randn((batch_size,20)).cuda()
        #gen train
        generator.zero_grad()
        generated_samples = generator(latent_space_samples)
        output_discgen = discriminator(generated_samples)
        genloss = criterion(output_discgen,real_labels)
        genacc = 0
        genloss.backward()
        gen_opt.step()
        if epoch % 10 == 0 and n == batch_size - 1:

            print(f"Epoch: {epoch} Discriminator Loss: {disloss} Discriminator Accuracy: {disacc}")

            print(f"Epoch: {epoch} Generator Loss: {genloss}     Generator Accuracy: {genacc}")
        

Epoch: 0 Discriminator Loss: nan Discriminator Accuracy: 0
Epoch: 0 Generator Loss: nan     Generator Accuracy: 0
