In [27]:
import pandas as pd
import os, sys
import numpy as np

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '../..')))

from utils.prediction_utils import *
""
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import make_scorer, r2_score

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim import Adam

random_seed = 42

In [3]:
base_dir = "/Users/andrew/Desktop/Harvard/idreos-research/gpu_profiling"
X, y = get_data("conv2d", base_dir, sample_rate=1.0)
X.info()

  dfs = pd.concat(dfs, axis=0, ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 685125 entries, 0 to 685124
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   b             685125 non-null  int64  
 1   in_channels   685125 non-null  int64  
 2   iH            685125 non-null  int64  
 3   iW            685125 non-null  int64  
 4   out_channels  685125 non-null  int64  
 5   groups        685125 non-null  int64  
 6   kH            685125 non-null  int64  
 7   kW            685125 non-null  int64  
 8   stride        685125 non-null  int64  
 9   dilation      685125 non-null  int64  
 10  gflops        685125 non-null  float64
 11  dtype_16      685125 non-null  bool   
 12  dtype_32      685125 non-null  bool   
 13  dtype_b16     685125 non-null  bool   
 14  transposed_0  685125 non-null  bool   
 15  transposed_1  685125 non-null  bool   
dtypes: bool(5), float64(1), int64(10)
memory usage: 60.8 MB


In [4]:
df = pd.concat([X, y], axis=1)
df = df.query("time > 0").dropna()
X, y = df.drop(["time"], axis=1), df["time"]

In [5]:
X_train, X_val, X_test, y_train, y_val, y_test = get_train_test_split(X, y, return_concat=False)
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 444058 entries, 178626 to 121958
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   b             444058 non-null  int64  
 1   in_channels   444058 non-null  int64  
 2   iH            444058 non-null  int64  
 3   iW            444058 non-null  int64  
 4   out_channels  444058 non-null  int64  
 5   groups        444058 non-null  int64  
 6   kH            444058 non-null  int64  
 7   kW            444058 non-null  int64  
 8   stride        444058 non-null  int64  
 9   dilation      444058 non-null  int64  
 10  gflops        444058 non-null  float64
 11  dtype_16      444058 non-null  bool   
 12  dtype_32      444058 non-null  bool   
 13  dtype_b16     444058 non-null  bool   
 14  transposed_0  444058 non-null  bool   
 15  transposed_1  444058 non-null  bool   
dtypes: bool(5), float64(1), int64(10)
memory usage: 42.8 MB


# Neural Network Training

## From Paper

In [41]:
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR

class PredictionNetwork(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size):
        super(PredictionNetwork, self).__init__()
        layers = []
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.ReLU())
            input_size = hidden_size
        layers.append(nn.Dropout(p=0.5))  # Dropout before the output layer
        layers.append(nn.Linear(hidden_layers[-1], output_size))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [42]:
def rmsle_loss(y_pred, y_actual):
    return torch.sqrt(torch.mean(torch.square(torch.log1p(y_pred) - torch.log1p(y_actual))))

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=300):
    criterion = rmsle_loss
    optimizer = Adam(model.parameters(), lr=0.1, weight_decay=1e-5)  # L2 regularization
    scheduler = StepLR(optimizer, step_size=40, gamma=0.5)  # Learning rate decay

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for data, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        scheduler.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader)}')

        # Optionally validate the model
        model.eval()
        with torch.no_grad():
            validation_loss = 0
            for data, targets in val_loader:
                outputs = model(data)
                loss = criterion(outputs, targets)
                validation_loss += loss.item()
            print(f'Validation Loss: {validation_loss / len(val_loader)}')

In [None]:
# Define your data loaders
# train_loader, val_loader = setup_data_loaders()

# Assuming a certain input size and output size
model = PredictionNetwork(input_size=10, hidden_layers=[64, 128], output_size=1)
# train_model(model, train_loader, val_loader)


## Random Stuff

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [14]:
# X_train_scaled.shape
# X_val_scaled.shape
# X_test_scaled.shape

(95155,)

In [8]:
try:
    X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_t = torch.tensor(y_train.values, dtype=torch.float32)
    X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
    y_val_t = torch.tensor(y_val.values, dtype=torch.float32)
except Exception as e:
    print(f"Error occurred: {e}")

In [14]:
# Creating datasets
train_dataset = TensorDataset(X_train_t, y_train_t)
val_dataset = TensorDataset(X_val_t, y_val_t)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [36]:
# Define the Neural Network Architecture
class RegressionNN(nn.Module):
    def __init__(self):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(X_train_t.shape[1], 20)
        self.fc2 = nn.Linear(50, 20)
        self.fc3 = nn.Linear(20, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class EnhancedRegressionNN(nn.Module):
    def __init__(self):
        super(EnhancedRegressionNN, self).__init__()
        self.fc1 = nn.Linear(X_train_t.shape[1], 100)  # Increase the number of neurons
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 20)
        self.fc4 = nn.Linear(20, 10)
        self.fc5 = nn.Linear(10, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)  # Adding dropout for regularization

        # Optional: Include batch normalization layers
        self.batch_norm1 = nn.BatchNorm1d(100)
        self.batch_norm2 = nn.BatchNorm1d(50)
        self.batch_norm3 = nn.BatchNorm1d(20)
        self.batch_norm4 = nn.BatchNorm1d(10)

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.fc1(x)))
        x = self.dropout(x)  # Applying dropout
        x = self.relu(self.batch_norm2(self.fc2(x)))
        x = self.relu(self.batch_norm3(self.fc3(x)))
        x = self.relu(self.batch_norm4(self.fc4(x)))
        x = self.fc5(x)  # Output layer without
        return x

In [39]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 634369 entries, 0 to 684936
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   b             634369 non-null  int64  
 1   in_channels   634369 non-null  int64  
 2   iH            634369 non-null  int64  
 3   iW            634369 non-null  int64  
 4   out_channels  634369 non-null  int64  
 5   groups        634369 non-null  int64  
 6   kH            634369 non-null  int64  
 7   kW            634369 non-null  int64  
 8   stride        634369 non-null  int64  
 9   dilation      634369 non-null  int64  
 10  gflops        634369 non-null  float64
 11  dtype_16      634369 non-null  bool   
 12  dtype_32      634369 non-null  bool   
 13  dtype_b16     634369 non-null  bool   
 14  transposed_0  634369 non-null  bool   
 15  transposed_1  634369 non-null  bool   
dtypes: bool(5), float64(1), int64(10)
memory usage: 61.1 MB


In [38]:
"""
TODO: write a script that
1) Takes a set of learning rates
2) Takes a potential model name
3) outputs a bunch of validation losses in a new folder.
"""
lr = 1e-4

# Instantiate the model, loss function, and optimizer
model = EnhancedRegressionNN()
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=1e-5)

validation_losses = []

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    train_loss = 0
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        loss.backward()
        optimizer.step()
        
        train_loss = loss.item()
    
    # Validation loop
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for inputs, targets in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, targets.view(-1, 1)).item()
        val_loss /= len(val_loader)
    validation_losses.append(val_loss)
    # if (epoch + 1) % 2 == 0:
    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch 1, Train Loss: 694344.6250, Validation Loss: 500541.3714
Epoch 2, Train Loss: 350.9424, Validation Loss: 493101.3078
Epoch 3, Train Loss: 95348.5391, Validation Loss: 482951.1448
Epoch 4, Train Loss: 155022.1875, Validation Loss: 474194.9193
Epoch 5, Train Loss: 734.2679, Validation Loss: 457000.2978
Epoch 6, Train Loss: 17726.1074, Validation Loss: 418684.1120
Epoch 7, Train Loss: 1518.5846, Validation Loss: 397694.7514
Epoch 8, Train Loss: 7547.5752, Validation Loss: 390082.7111
Epoch 9, Train Loss: 4378.8276, Validation Loss: 360240.3024
Epoch 10, Train Loss: 6034.9473, Validation Loss: 315839.8117


KeyboardInterrupt: 

In [35]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 7.3877e-01,  3.1969e-01,  5.8624e-01,  6.3714e-01,  3.1101e-02,
         -3.1253e+00,  2.5411e-01,  4.2812e-01,  2.1393e-37,  2.2994e-37,
          7.5621e-03, -5.7961e-01,  1.4561e+00, -1.4630e+00,  1.1861e-01,
         -1.2178e-01],
        [ 9.1689e-01,  9.7033e-01,  7.0651e-01,  1.0657e+00,  1.9568e-01,
         -1.5035e+00,  5.1491e-01,  4.2059e-01,  1.7083e-37,  2.7001e-37,
          1.4968e-02, -2.9886e-01,  1.8517e+00, -1.8406e+00, -5.3701e-01,
          8.5764e-01],
        [ 3.8090e-03,  5.0351e-03,  2.1848e-03,  2.4860e-03,  4.2933e-03,
         -2.7401e+00,  2.3639e-03, -8.9123e-04,  2.0427e-37, -4.2463e-38,
          4.2975e-01,  1.4509e-01,  1.4507e-01,  1.4317e-01,  3.5634e-02,
          3.7120e-02],
        [ 6.2758e-01,  6.8996e-01,  8.1576e-01,  7.0331e-01,  8.7013e-02,
         -8.9521e-01,  2.6054e-01,  2.2520e-01,  5.6746e-37, -2.0159e-37,
          4.3432e-01, -2.3801e-01,  9.1401e-01, -1.2021e+00, -7.4300e-01,
          4.3373e-01]