In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

- Define a PyTorch model

In [2]:
class DelayModel(nn.Module):
    def __init__(self, inputs_size):
        super(DelayModel, self).__init__()
        self.fc1 = nn.Linear(inputs_size, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

In [3]:
def train_and_evaluate(df):
    # remove columns has unique value for all rows
    unique_counts = df.nunique()
    df = df[unique_counts[unique_counts > 1].index]

    # convert categorical values into numeric
    cat_cols = df.select_dtypes(include=['object']).columns
    for col in cat_cols:
        df[col] = pd.factorize(df[col])[0]

    # Split data into training and testing sets
    train_df, test_df = train_test_split(df, test_size=0.2)

    train_mean = train_df.iloc[:, df.columns != 'delay'].mean()
    train_std = train_df.iloc[:, df.columns != 'delay'].std()

    train_df.iloc[:, df.columns != 'delay'] = (train_df.iloc[:, df.columns != 'delay'] - train_mean) / train_std
    test_df.iloc[:, df.columns != 'delay'] = (test_df.iloc[:, df.columns != 'delay'] - train_mean) / train_std

    # Convert pandas dataframes to PyTorch tensors
    train_inputs = torch.tensor(train_df.iloc[:, train_df.columns != 'delay'].values, dtype=torch.float32)
    train_targets = torch.tensor(train_df.iloc[:, train_df.columns == 'delay'].values, dtype=torch.float32)
    test_inputs = torch.tensor(test_df.iloc[:, test_df.columns != 'delay'].values, dtype=torch.float32)
    test_targets = torch.tensor(test_df.iloc[:, test_df.columns == 'delay'].values, dtype=torch.float32)

    model = DelayModel(inputs_size=train_inputs.shape[1])
    criterion = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Train the model
    num_epochs = 100
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(train_inputs)
        loss = criterion(outputs, train_targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # evaluate the model
    with torch.no_grad():
        test_outputs = model(test_inputs)
        test_loss = criterion(test_outputs, test_targets)
        print(f'Test Loss: {test_loss.item():.4f}')

In [4]:
train_and_evaluate(df=pd.read_csv('./csv/floyd.csv', index_col=0))

Epoch [10/100], Loss: 1873.4608
Epoch [20/100], Loss: 1873.3674
Epoch [30/100], Loss: 1873.3575
Epoch [40/100], Loss: 1873.3523
Epoch [50/100], Loss: 1873.3474
Epoch [60/100], Loss: 1873.3429
Epoch [70/100], Loss: 1873.3383
Epoch [80/100], Loss: 1873.3334
Epoch [90/100], Loss: 1873.3284
Epoch [100/100], Loss: 1873.3232
Test Loss: 1251.8546


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


In [5]:
train_and_evaluate(df=pd.read_csv('./csv/histogram.csv', index_col=0))

Epoch [10/100], Loss: 246.0704
Epoch [20/100], Loss: 245.9270
Epoch [30/100], Loss: 245.7909
Epoch [40/100], Loss: 245.6336
Epoch [50/100], Loss: 245.4402
Epoch [60/100], Loss: 245.1930
Epoch [70/100], Loss: 244.8602
Epoch [80/100], Loss: 244.3965
Epoch [90/100], Loss: 243.7232
Epoch [100/100], Loss: 243.0667
Test Loss: 197.6655


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


In [6]:
train_and_evaluate(df=pd.read_csv('./csv/dct.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 321.1297
Epoch [20/100], Loss: 321.0825
Epoch [30/100], Loss: 321.0557
Epoch [40/100], Loss: 321.0293
Epoch [50/100], Loss: 321.0012
Epoch [60/100], Loss: 320.9706
Epoch [70/100], Loss: 320.9367
Epoch [80/100], Loss: 320.8990
Epoch [90/100], Loss: 320.8563
Epoch [100/100], Loss: 320.8076
Test Loss: 260.8439


In [7]:
train_and_evaluate(df=pd.read_csv('./csv/binomial.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 15.2935
Epoch [20/100], Loss: 15.0955
Epoch [30/100], Loss: 14.8545
Epoch [40/100], Loss: 14.5245
Epoch [50/100], Loss: 14.0432
Epoch [60/100], Loss: 13.3246
Epoch [70/100], Loss: 12.9320
Epoch [80/100], Loss: 12.5743
Epoch [90/100], Loss: 12.4068
Epoch [100/100], Loss: 12.3562
Test Loss: 11.7716


In [8]:
train_and_evaluate(df=pd.read_csv('./csv/rocrand_k0.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 31.8829
Epoch [20/100], Loss: 31.7806
Epoch [30/100], Loss: 31.6649
Epoch [40/100], Loss: 31.5285
Epoch [50/100], Loss: 31.3621
Epoch [60/100], Loss: 31.1525
Epoch [70/100], Loss: 30.8818
Epoch [80/100], Loss: 30.5198
Epoch [90/100], Loss: 30.0469
Epoch [100/100], Loss: 29.6522
Test Loss: 31.7482


In [9]:
train_and_evaluate(df=pd.read_csv('./csv/rocrand2_k0.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 31.9303
Epoch [20/100], Loss: 31.8520
Epoch [30/100], Loss: 31.7706
Epoch [40/100], Loss: 31.6830
Epoch [50/100], Loss: 31.5852
Epoch [60/100], Loss: 31.4717
Epoch [70/100], Loss: 31.3355
Epoch [80/100], Loss: 31.1666
Epoch [90/100], Loss: 30.9512
Epoch [100/100], Loss: 30.6694
Test Loss: 27.8271


In [10]:
train_and_evaluate(df=pd.read_csv('./csv/rocrand_k1.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 27.8680
Epoch [20/100], Loss: 27.8017
Epoch [30/100], Loss: 27.7298
Epoch [40/100], Loss: 27.6485
Epoch [50/100], Loss: 27.5541
Epoch [60/100], Loss: 27.4419
Epoch [70/100], Loss: 27.3055
Epoch [80/100], Loss: 27.1348
Epoch [90/100], Loss: 26.9155
Epoch [100/100], Loss: 26.6261
Test Loss: 27.0402


In [11]:
train_and_evaluate(df=pd.read_csv('./csv/rocrand2_k1.csv', index_col=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 29.0224
Epoch [20/100], Loss: 28.9403
Epoch [30/100], Loss: 28.8506
Epoch [40/100], Loss: 28.7488
Epoch [50/100], Loss: 28.6292
Epoch [60/100], Loss: 28.4845
Epoch [70/100], Loss: 28.3047
Epoch [80/100], Loss: 28.0761
Epoch [90/100], Loss: 27.7757
Epoch [100/100], Loss: 27.3683
Test Loss: 27.7321


In [12]:
train_and_evaluate(df=pd.read_csv('./csv/nbody.csv', index_col=0))

  train_and_evaluate(df=pd.read_csv('./csv/nbody.csv', index_col=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.factorize(df[col])[0]


Epoch [10/100], Loss: 5.9727
Epoch [20/100], Loss: 5.9159
Epoch [30/100], Loss: 5.8671
Epoch [40/100], Loss: 5.8172
Epoch [50/100], Loss: 5.7641
Epoch [60/100], Loss: 5.7061
Epoch [70/100], Loss: 5.6412
Epoch [80/100], Loss: 5.5671
Epoch [90/100], Loss: 5.4807
Epoch [100/100], Loss: 5.3772
Test Loss: 5.3822


In [26]:
train_and_evaluate(df=pd.read_csv('./csv/data.csv', index_col=0))

  train_and_evaluate(df=pd.read_csv('./csv/data.csv', index_col=0))


Epoch [10/100], Loss: 25.0514
Epoch [20/100], Loss: 25.0340
Epoch [30/100], Loss: 25.0226
Epoch [40/100], Loss: 25.0126
Epoch [50/100], Loss: 25.0027
Epoch [60/100], Loss: 24.9922
Epoch [70/100], Loss: 24.9811
Epoch [80/100], Loss: 24.9691
Epoch [90/100], Loss: 24.9561
Epoch [100/100], Loss: 24.9418
Test Loss: 24.8118
