In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from IPython import display

In [2]:
from torch.utils.data import DataLoader, TensorDataset

In [3]:
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/My Drive/Multi_core/preprocessed_data.csv')
# data = pd.read_csv('../data/prepocessed_data.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
dropped_col = ['Speedup']

In [5]:
X = data.drop(dropped_col, axis=1).values
y = data[dropped_col].values

In [6]:
# train test split
X_train, X_rem, y_train, y_rem = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)

In [7]:
X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)
X_val_tensor = torch.from_numpy(X_val)
y_val_tensor = torch.from_numpy(y_val)
X_test_tensor = torch.from_numpy(X_test)
y_test_tensor = torch.from_numpy(y_test)

In [8]:
print("X_train_Size: " + str(X_train_tensor.shape))
print("y_train_Size: " + str(y_train_tensor.shape))
print("X_val_Size: " + str(X_val_tensor.shape))
print("y_val_Size: " + str(y_val_tensor.shape))
print("X_test: " + str(X_test_tensor.shape))
print("y_test_Size: " + str(y_test_tensor.shape))

X_train_Size: torch.Size([8400, 26])
y_train_Size: torch.Size([8400, 1])
X_val_Size: torch.Size([1800, 26])
y_val_Size: torch.Size([1800, 1])
X_test: torch.Size([1800, 26])
y_test_Size: torch.Size([1800, 1])


In [9]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [10]:
I = 26  # input feature
O = 1 # output result
H = 64 # hidden unit

In [11]:
model = nn.Sequential(
    nn.Linear(I, H),
    nn.ReLU(),
    nn.Linear(H, H),
    nn.ReLU(),
    nn.Linear(H, O)
)
model = model.to(torch.float64)

In [12]:
weight_tensor = model[0].weight

# Get the datatype of the weight tensor
weight_tensor.dtype

torch.float64

In [13]:
X_train_tensor.dtype

torch.float64

In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [15]:
def train(model, loader, optimizer):
    loss_sum = 0.0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(loader):
        optimizer.zero_grad()
        output = model(inputs)
        loss = torch.nn.functional.mse_loss(output, targets)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()
        pred_labels = torch.round(output)
        correct += (pred_labels == targets).sum().item()
        total += len(targets)
        
    avg_loss = loss_sum / len(loader)
    accuracy = correct / total
    print(f"Average loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

In [16]:
train_losses = []
train_accs = []
val_losses = []
val_accs = []

In [17]:
#train
for epoch in range(200):
    print(f"Epoch {epoch + 1}")
    train_loss, train_accuracy = train(model, train_loader, optimizer)
    train_losses.append(train_loss)
    train_accs.append(train_accuracy)
    
    # validation
    with torch.no_grad():
        loss_sum = 0.0
        correct = 0
        total = 0
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            output = model(inputs)
            loss = torch.nn.functional.mse_loss(output, targets)
            loss_sum += loss.item()
            pred_labels = torch.round(output)
            correct += (pred_labels == targets).sum().item()
            total += len(targets)
        avg_loss = loss_sum / len(test_loader)
        accuracy = correct / total
        val_losses.append(avg_loss)
        val_accs.append(accuracy)
        print(f"Test Average loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    display.clear_output(wait=True)

Epoch 342


KeyboardInterrupt: ignored

In [None]:
# train loss, accuracy
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# plot the loss values
ax1.plot(train_losses, label='Training Loss')
ax1.set_title('Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend()

# plot the accuracy values
ax2.plot(train_accs, label='Training Accuracy')
ax2.set_title('Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.legend()

# show the plot
plt.show()

In [None]:
# val loss, accuracy
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# plot the loss values
ax1.plot(val_losses, label='Validation Loss')
ax1.set_title('Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend()

# plot the accuracy values
ax2.plot(val_accs, label='Validation Accuracy')
ax2.set_title('Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.legend()

# show the plot
plt.show()