In [2]:
!git clone https://github.com/afrenkai/DS-3010-Final.git

Cloning into 'DS-3010-Final'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 77 (delta 26), reused 61 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (77/77), 16.15 MiB | 13.02 MiB/s, done.
Resolving deltas: 100% (26/26), done.


In [3]:
%cd DS-3010-Final

/content/DS-3010-Final


In [4]:
!ls

cv.py  data.ipynb  main.py		README.md	  setup.bat
Data   LICENSE	   Preprocessing.ipynb	requirements.txt  sgemm_product.csv


In [31]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [123]:
train_df = pd.read_csv('Data/SGEMM_train.csv')
val_df = pd.read_csv('Data/SGEMM_val.csv')

# preprocessing

In [134]:
def norm(x, xmin, xmax, a, b):
  '''
  Restricts x values to range of [xmin, xmax]
  '''
  numerator = x - xmin
  denominator = xmax - xmin
  return (numerator / denominator) * (b - a) + a

In [136]:
cols_to_combine = ['Run1 (ms)', 'Run2 (ms)', 'Run3 (ms)', 'Run4 (ms)']

In [140]:
def preprocess(df: pd.DataFrame):
  df['DELTA_RUNTIME'] = df.apply(
      lambda row: np.mean([row['Run1 (ms)'], row['Run2 (ms)'], row['Run3 (ms)'], row['Run4 (ms)']]),
      axis=1
  )
  for col in df.columns:
    if col in cols_to_combine:
      df = df.drop(col, axis = 1)
  min = 0
  max = 1


  df = df.apply(
      lambda row: (norm(row, row.min(), row.max(), min, max))
  )
  x = df.iloc[:, :14]
  y = df.iloc[:, -1:]
  return x, y


# Lin Reg

# Poly Reg

# Simple Neural Net

In [113]:
class GPUNN(nn.Module):
  def __init__(self, in_feat, out_feat):
    super(GPUNN, self).__init__()
    self.device = 'cuda' if torch.cuda.is_available else 'cpu'
    self.L1 = nn.Linear(in_feat, 64, device=self.device)
    self.L2 = nn.Linear(64, out_feat, device = self.device)
    self.relu = nn.ReLU()
    self.bn1 = nn.BatchNorm1d(64, device = self.device)
    self.bn2 = nn.BatchNorm1d(out_feat, device = self.device)

  def forward(self, x):
    x = self.bn1(self.relu(self.L1(x)))
    x = self.bn2(self.L2(x))
    return x



In [114]:
def train(model: nn.Module, train_dl: DataLoader, batch_size, device, n_epochs, optimizer, criterion):
  model.train()
  for batch, (data, target) in enumerate(train_dl):

    data, target = data.to(device).float(), target.to(device).float()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, target)
    loss.backward()
    optimizer.step()
    if batch % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                n_epochs, batch * len(data), len(train_dl.dataset),
                100. * batch / len(train_dl), loss.item()))




In [159]:
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device).float(), target.to(device).float()
            output = model(data)
            test_loss = criterion(output, target)
    print(test_loss / len(test_loader.dataset))






In [115]:
def create_dls(x: pd.DataFrame, y:pd.DataFrame):
  x = x.loc[:, :].values
  y = y.loc[:, :].values
  x_ten = torch.tensor(x)
  y_ten = torch.tensor(y)
  ds = TensorDataset(x_ten, y_ten)
  dl = DataLoader(ds, batch_size = 32)
  return ds, dl

In [161]:
x_tr, y_tr = preprocess(train_df)
model = GPUNN(len(x_tr.columns), len(y_tr.columns))
print(model)
_, train_dl = create_dls(x_tr, y_tr)
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4)
train(model, train_dl, 32, model.device, 10, optimizer, criterion)

GPUNN(
  (L1): Linear(in_features=14, out_features=64, bias=True)
  (L2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [165]:
x_val, y_val = preprocess(val_df)
_, val_dl = create_dls(x_val, y_val)
criterion = nn.MSELoss()
test(model, model.device, val_dl, criterion)

tensor(2.5191e-06, device='cuda:0')
