In [None]:
!git clone https://github.com/afrenkai/DS-3010-Final.git

Cloning into 'DS-3010-Final'...
remote: Enumerating objects: 111, done.[K
remote: Counting objects: 100% (111/111), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 111 (delta 44), reused 78 (delta 23), pack-reused 0 (from 0)[K
Receiving objects: 100% (111/111), 17.56 MiB | 15.39 MiB/s, done.
Resolving deltas: 100% (44/44), done.


In [None]:
%cd DS-3010-Final

/content/DS-3010-Final


In [None]:
!ls
!pip install torcheval

cv.py  LICENSE	   main.py			 README.md
Data   Main.ipynb  new_3010_proj_work_ben.ipynb  requirements.txt
Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.2/179.2 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torcheval
Successfully installed torcheval-0.0.7


In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import lightgbm as lgb
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
from lightgbm import LGBMRegressor
from torcheval.metrics import R2Score

In [None]:
train_df = pd.read_csv('Data/SGEMM_train.csv')
val_df = pd.read_csv('Data/SGEMM_val.csv')

#TODO: read test data (already in data dir), see what's going on in lightgbm, get r2 for the neural net

# preprocessing

In [None]:
def norm(x, xmin, xmax, a, b):
  '''
  Restricts x values to range of [xmin, xmax]
  '''
  numerator = x - xmin
  denominator = xmax - xmin
  return (numerator / denominator) * (b - a) + a

In [None]:
cols_to_combine = ['Run1 (ms)', 'Run2 (ms)', 'Run3 (ms)', 'Run4 (ms)']

In [None]:
def preprocess(df: pd.DataFrame):
  df['DELTA_RUNTIME'] = df.apply(
      lambda row: np.mean([row['Run1 (ms)'], row['Run2 (ms)'], row['Run3 (ms)'], row['Run4 (ms)']]),
      axis=1
  )
  for col in df.columns:
    if col in cols_to_combine:
      df = df.drop(col, axis = 1)
  min = 0
  max = 1


  df = df.apply(
      lambda row: (norm(row, row.min(), row.max(), min, max))
  )
  x = df.iloc[:, :14]
  y = df.iloc[:, -1:]
  return x, y


# LightGBM

In [None]:

x_tr, y_tr = preprocess(train_df)

train_data = lgb.Dataset(x_tr, label=y_tr)
x_val, y_val = preprocess(val_df)
# Create a LightGBM dataset for testing with features X_val and labels Y_val,
# and specify the reference dataset as train_data for consistent evaluation
val_data = lgb.Dataset(x_val, label=y_val, reference=train_data)
params = {
    'objective': 'regression',
    'metric': 'mse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
}

num_round = 100
bst = lgb.train(params, train_data, num_round, valid_sets=[
                val_data])


# Create an instance of the LightGBM Regressor with the MSE metric.
model = LGBMRegressor(metric='mse')

# Train the model using the training data.
model.fit(x_tr, y_tr)

y_train = model.predict(x_tr)
y_v = model.predict(x_val)
print("Training MSE:", mse(y_tr, y_train))
print("Validation MSE:", mse(y_val, y_v))

print('train r2:', r2_score(y_tr, y_train))
print('val r2:', r2_score(y_val, y_v))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 193280, number of used features: 14
[LightGBM] [Info] Start training from score 0.061354
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 193280, number of used features: 14
[LightGBM] [Info] Start training from score 0.061354
Training MSE: 0.00012048252219381041
Validation MSE: 0.0001191615359407208
train r2: 0.9901529624802171
val r2: 0.9903041142135522


# Simple Neural Net

In [None]:
class GPUNN(nn.Module):
  def __init__(self, in_feat, out_feat):
    super(GPUNN, self).__init__()
    self.device = 'cuda' if torch.cuda.is_available else 'cpu'
    self.L1 = nn.Linear(in_feat, 64, device=self.device)
    self.L2 = nn.Linear(64, out_feat, device = self.device)
    self.relu = nn.ReLU()
    self.bn1 = nn.BatchNorm1d(64, device = self.device)
    self.bn2 = nn.BatchNorm1d(out_feat, device = self.device)

  def forward(self, x):
    x = self.bn1(self.relu(self.L1(x)))
    x = self.bn2(self.L2(x))
    return x



In [None]:
def train(model: nn.Module, train_dl: DataLoader, batch_size, device, n_epochs, optimizer, criterion):
  model.train()
  for batch, (data, target) in enumerate(train_dl):

    data, target = data.to(device).float(), target.to(device).float()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, target)
    loss.backward()
    optimizer.step()
    if batch % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                n_epochs, batch * len(data), len(train_dl.dataset),
                100. * batch / len(train_dl), loss.item()))

  torch.save(model.state_dict(), 'nn.pth')




In [None]:
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    losses = []
    r2s = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device).float(), target.to(device).float()
            output = model(data)

            test_loss = criterion(output, target)
            # print('loss bien')
            # print(f'Target Tensor: {target.detach().cpu().numpy()}\n, Output Tensor:{output.detach().cpu().numpy()}\n')
            test_r2 = r2_score(target.detach().cpu().numpy(), output.detach().cpu().numpy())

            # print('r2 bien')
            losses.append(test_loss)
            r2s.append(test_r2)


    # print(test_loss / len(test_loader.dataset))
    # print(np.mean(r2s))

    return (np.mean([ten.detach().cpu().numpy() for ten in losses]), 0)






In [None]:
def create_dls(x: pd.DataFrame, y:pd.DataFrame):
  x = x.loc[:, :].values
  y = y.loc[:, :].values
  x_ten = torch.tensor(x)
  y_ten = torch.tensor(y)
  ds = TensorDataset(x_ten, y_ten)
  dl = DataLoader(ds, batch_size = 32)
  return ds, dl

In [None]:
x_tr, y_tr = preprocess(train_df)
model = GPUNN(len(x_tr.columns), len(y_tr.columns))
print(model)
_, train_dl = create_dls(x_tr, y_tr)
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4)
train(model, train_dl, 32, model.device, 10, optimizer, criterion)

GPUNN(
  (L1): Linear(in_features=14, out_features=64, bias=True)
  (L2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [None]:
x_val, y_val = preprocess(val_df)
_, val_dl = create_dls(x_val, y_val)
criterion = nn.MSELoss()
val_loss, _= test(model, model.device, val_dl, criterion)
print(f'Neural Network Validation Mean Squared Error: {val_loss}')
# print(f'Neural Network Validation R2: {val_r2}')
# print(len(tar))
# print(len(lab))
# print(tar)
# print(type(lab[0]))
# print(type(tar[0]))
# metric = R2Score()
# metric.update(lab[0], tar[0])
# metric.compute()

Neural Network Validation Mean Squared Error: 0.1372702717781067


In [None]:
# model.load_state_dict(torch.load('nn.pth', weights_only=True))
# # Print model's state_dict
# print("Model's state_dict:")
# model.state_dict()