In [21]:
!git clone https://github.com/afrenkai/DS-3010-Final.git

Cloning into 'DS-3010-Final'...
remote: Enumerating objects: 140, done.[K
remote: Counting objects: 100% (140/140), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 140 (delta 61), reused 81 (delta 24), pack-reused 0 (from 0)[K
Receiving objects: 100% (140/140), 18.29 MiB | 13.88 MiB/s, done.
Resolving deltas: 100% (61/61), done.


In [22]:
%cd DS-3010-Final

/content/DS-3010-Final/DS-3010-Final


In [23]:
!ls
!pip install torcheval

cv.py  LICENSE	   new_3010_proj_work_ben.ipynb  requirements.txt
Data   Main.ipynb  README.md


In [24]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import lightgbm as lgb
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
from lightgbm import LGBMRegressor
from torcheval.metrics import R2Score

In [25]:
train_df = pd.read_csv('Data/SGEMM_train.csv')
val_df = pd.read_csv('Data/SGEMM_val.csv')

#TODO: read test data (already in data dir), see what's going on in lightgbm, get r2 for the neural net

# preprocessing

In [26]:
def norm(x, xmin, xmax, a, b):
  '''
  Restricts x values to range of [xmin, xmax]
  '''
  numerator = x - xmin
  denominator = xmax - xmin
  return (numerator / denominator) * (b - a) + a

In [27]:
cols_to_combine = ['Run1 (ms)', 'Run2 (ms)', 'Run3 (ms)', 'Run4 (ms)']

In [28]:
def preprocess(df: pd.DataFrame):
  df['DELTA_RUNTIME'] = df.apply(
      lambda row: np.mean([row['Run1 (ms)'], row['Run2 (ms)'], row['Run3 (ms)'], row['Run4 (ms)']]),
      axis=1
  )
  for col in df.columns:
    if col in cols_to_combine:
      df = df.drop(col, axis = 1) #removes redundant cols


  min = 0
  max = 1

  df = df.apply(
      lambda row: (norm(row, row.min(), row.max(), min, max))
  )
  x = df.iloc[:, :14] # features
  y = df.iloc[:, -1:] # target
  return x, y


# LightGBM

In [None]:

x_tr, y_tr = preprocess(train_df)

train_data = lgb.Dataset(x_tr, label=y_tr)
x_val, y_val = preprocess(val_df)
# Create a LightGBM dataset for testing with features X_val and labels Y_val,
# and specify the reference dataset as train_data for consistent evaluation
val_data = lgb.Dataset(x_val, label=y_val, reference=train_data)
params = {
    'objective': 'regression',
    'metric': 'mse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
}

num_round = 100
bst = lgb.train(params, train_data, num_round, valid_sets=[
                val_data])


# Create an instance of the LightGBM Regressor with the RMSE metric.
model = LGBMRegressor(metric='mse')

# Train the model using the training data.
model.fit(x_tr, y_tr)

y_train = model.predict(x_tr)
y_v = model.predict(x_val)
print("Training MSE:", mse(y_tr, y_train))
print("Validation MSE:", mse(y_val, y_v))

print('train r2:', r2_score(y_tr, y_train))
print('val r2:', r2_score(y_val, y_v))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017275 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 193280, number of used features: 14
[LightGBM] [Info] Start training from score 0.061354
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 193280, number of used features: 14
[LightGBM] [Info] Start training from score 0.061354
Training MSE: 0.00012048252219381041
Validation MSE: 0.0001191615359407208
train r2: 0.9901529624802171
val r2: 0.9903041142135522


# Simple Neural Net

In [None]:
class GPUNN(nn.Module):
  def __init__(self, in_feat, out_feat):
    super(GPUNN, self).__init__()
    self.device = 'cuda' if torch.cuda.is_available else 'cpu'
    self.L1 = nn.Linear(in_feat, 64, device=self.device)
    self.L2 = nn.Linear(64, out_feat, device = self.device)
    self.relu = nn.ReLU()
    self.bn1 = nn.BatchNorm1d(64, device = self.device)
    self.bn2 = nn.BatchNorm1d(out_feat, device = self.device)

  def forward(self, x):
    x = self.bn1(self.relu(self.L1(x)))
    x = self.bn2(self.L2(x))
    return x



In [None]:
# training output example was basen on an example from pytorch on using CNNs for ImageNet. https://github.com/pytorch/examples/blob/main/mnist/main.py
# lines 45-48 in original code, lines roughly 13-16 in ours
def train(model: nn.Module, train_dl: DataLoader, batch_size, device, n_epochs, optimizer, criterion):
    model.train()
    for epoch in trange(n_epochs):
        for batch, (data, target) in enumerate(train_dl):
            data, target = data.to(device).float(), target.to(device).float()
            optimizer.zero_grad()
            out = model(data)
            loss = criterion(out, target)
            loss.backward()
            optimizer.step()
            if batch % 1000 == 0:
                print('\nTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch + 1, batch * len(data), len(train_dl.dataset),
                    100. * batch / len(train_dl), loss.item()))

    torch.save(model.state_dict(), 'nn.pth')
    print('model saved to nn.pth')


In [None]:
#r2 score stuff taken from https://pytorch.org/torcheval/main/generated/torcheval.metrics.R2Score.html
#I think meaning loss over batches is fair, since that's a relatively good indicator of performance.
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    losses = []
    metric = R2Score()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device).float(), target.to(device).float()
            output = model(data)

            test_loss = criterion(output, target)
            # print('loss bien')
            # print(f'Target Tensor: {target.detach().cpu().numpy()}\n, Output Tensor:{output.detach().cpu().numpy()}\n')

            metric.update(output, target)
            # print(f' r2 rn: {metric.compute()}')

            # print('r2 bien')
            losses.append(test_loss)


    # print(test_loss / len(test_loader.dataset))
    # print(np.mean(r2s))
    r2 = metric.compute()
    r2 = float(r2.detach().cpu().numpy())
    # print(type(r2))
    # print(round(r2, 4))
    return (np.mean([ten.detach().cpu().numpy() for ten in losses]), round(r2, 4) )






In [None]:
def create_dls(x: pd.DataFrame, y:pd.DataFrame, split: str = 'train'):
  x = x.loc[:, :].values #conv to np array
  y = y.loc[:, :].values
  x_ten = torch.tensor(x) # conv to tensor
  y_ten = torch.tensor(y)
  ds = TensorDataset(x_ten, y_ten)
  if split == 'train':
    dl = DataLoader(ds, batch_size = 32, shuffle = True) #safe to shuffle since its train
  else:
    dl = DataLoader(ds, batch_size = 32, shuffle = False) #assuming test/val, can't shuffle
  return ds, dl

In [15]:
x_tr, y_tr = preprocess(train_df)
model = GPUNN(len(x_tr.columns), len(y_tr.columns))
print(model)
_, train_dl = create_dls(x_tr, y_tr)
criterion = nn.MSELoss() # MSE, like we've been using everywhere else
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4) # pretty standard, no need for k-fold since the method works pretty well without tuning
train(model, train_dl, 32, model.device, 10, optimizer, criterion) # 10 epochs to get a baseline.

GPUNN(
  (L1): Linear(in_features=14, out_features=64, bias=True)
  (L2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


  0%|          | 0/10 [00:00<?, ?it/s]









 10%|█         | 1/10 [00:12<01:51, 12.39s/it]










 20%|██        | 2/10 [00:23<01:34, 11.86s/it]










 30%|███       | 3/10 [00:35<01:21, 11.66s/it]










 40%|████      | 4/10 [00:47<01:12, 12.07s/it]











 50%|█████     | 5/10 [01:05<01:09, 13.89s/it]









 60%|██████    | 6/10 [01:18<00:54, 13.60s/it]










 70%|███████   | 7/10 [01:29<00:38, 12.93s/it]










 80%|████████  | 8/10 [01:41<00:24, 12.48s/it]










 90%|█████████ | 9/10 [01:52<00:12, 12.15s/it]










100%|██████████| 10/10 [02:04<00:00, 12.42s/it]


model saved to nn.pth





In [16]:
x_val, y_val = preprocess(val_df)
_, val_dl = create_dls(x_val, y_val)
criterion = nn.MSELoss()
val_loss, val_r2= test(model, model.device, val_dl, criterion)
print(f'Neural Network Validation Mean Squared Error: {val_loss:.6f}')
print(f'Neural Network Validation R2: {val_r2}')

Neural Network Validation Mean Squared Error: 0.001176
Neural Network Validation R2: 0.9043


In [17]:
# model.load_state_dict(torch.load('nn.pth', weights_only=True))
# print("Model's state_dict:")
# model.state_dict()

PCA, Lasso & Linear Regression - Katelyn

In [29]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)

X_train = pca.fit_transform(x_tr)
X_test = pca.transform(x_val)

explained_variance = pca.explained_variance_ratio_

print(explained_variance)


[0.10205183 0.10191195]


In [30]:
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

#From Sklearn --> Lasso documentation

#Setting alpha
lasso = Lasso(alpha=0.00001)

#Fitting to training data
lasso.fit(X_train, y_train)

#making y predictions based on the X_test, given by the PCA
y_pred = lasso.predict(X_test)

#Getting the MSE
mse = mean_squared_error(y_val, y_pred)
print(f"Mean Squared Error: {mse}")

print("Coefficients:", lasso.coef_)
#very low lasso coefficents

Mean Squared Error: 0.012236981126572913
Coefficients: [ 0.01265718 -0.00163292]


In [32]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression

# Initialize LDA and fit the model
linreg = LinearRegression()
linreg.fit(X_train, y_train)

# Predict on the test set
y_pred = linreg.predict(X_test)

# Evaluate accuracy
mse = mean_squared_error(y_val, y_pred)
print(f"Mean Squared Error: {mse}")

#This does extremely poorly? It's likely that the lower number of features means the model does not perform as well.
print('val r2:', r2_score(y_val, y_pred))

Mean Squared Error: 0.012236947492410354
val r2: 0.004309198228256705
