##### Experiment Ten
A quick search for a good R2

In [8]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import category_encoders as ce
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

In [9]:
# Load dataset
df = pd.read_csv('AmazonDataSales.csv')

# MAke column names lowercase
df.columns = df.columns.str.lower()

#Drop columns with only 1 unique value:  currency, index
df.drop([ 'currency', 'index'], axis=1, inplace=True)

def fill_in_rows_with_missing_values(df):
    #Fill in missing fullfilled-by with 'unknowns-ship'
    df['fulfilled-by'] = df['fulfilled-by'].fillna('unknowns-ship')
    #Fill in missing Unnamed: 22 with 'unknown-boolean'
    df['unnamed: 22'] = df['unnamed: 22'].fillna('unknown-boolean')
    #Fill in missing promotion-ids with 'potential-id-unknown'
    df['promotion-ids'] = df['promotion-ids'].fillna('potential-id-unknown')
    #Fill in missing Courier Status with 'Unknown'
    df['courier status'] = df['courier status'].fillna('Unknown')
    #Fill in missing ship-state with 'unknown-state'
    df['ship-state'] = df['ship-state'].fillna('unknown-state')
    #Fill in missing ship-city with 'unknown-city'
    df['ship-city'] = df['ship-city'].fillna('unknown-city')
    #Fill in missing ship-postal-code with 'unknown-address'
    df['ship-postal-code'] = df['ship-postal-code'].fillna('unknown-address')
    return df

df = fill_in_rows_with_missing_values(df)

# Fix date column
df['date'] = pd.to_datetime(df['date'], format='%m-%d-%y', errors='coerce')

#drop all rows with missing values - hold horses on this one
df.dropna(inplace=True)

# Feature engineering function
def feature_eng_function(df):

    # Drop rows with 'courier status' = "cancelled"
    df = df[df['courier status'] != 'cancelled']
    # Drop rows with 'status' = "cancelled"
    df = df[df['status'] != 'cancelled']
    #Drop rows with 'qty' = 0
    df = df[df['qty'] != 0]
    #Drop rows with 'amount' = 0
    df = df[df['amount'] != 0]
    # Drop all columns except 'amount', 'asin', 'sku', size, style
    df = df[['amount', 'size', 'qty', 'category','style']] # What adding style does!

    return df

#df = feature_eng_function(df)


# Select all columns except 'amount' as feature columns
feature_columns = df.columns.drop('amount')
# binary encode the feature columns
encoder = ce.BinaryEncoder(cols=feature_columns)
df_encoded = encoder.fit_transform(df[feature_columns])

X = df_encoded  # Features
y = df['amount']  # Target

  df = pd.read_csv('AmazonDataSales.csv')


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to train and evaluate a model
def train_evaluate_model(model, X_train, y_train, X_test, y_test):
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate r2 score and rsme
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mse = mean_squared_error(y_test, y_pred)  # Adding MSE calculation
    
    return r2, rmse, mse

#define model
regressor = xgb.XGBRegressor(objective='reg:squarederror')
#train model
regressor_train = train_evaluate_model(regressor, X_train, y_train, X_test, y_test)
print(f"R2/RMSE/MSE: {regressor_train}")


R2/RMSE/MSE: (0.7782085922653084, 132.16516403131257, 17467.63058342376)


XGBRegressor doesn't cut the 0.95 R2 right out of the box with the feature selection removed.

In [11]:
#Count the number of feature columns
num_feature_columns = len(df_encoded.columns)
print(f"Number of feature columns: {num_feature_columns}")

Number of feature columns: 138


In [12]:
# Processing for NN
# Convert to numpy arrays (required for PyTorch tensors)
X_np = np.array(X, dtype=np.float32)
y_np = np.array(y, dtype=np.float32)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_np)
y_tensor = torch.tensor(y_np)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [13]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [14]:
# LEarning rate

# Initial learning rate
initial_lr = 0.001

# Warm-up and decay settings
warm_up_epochs = 10
decay_rate = 0.95
decay_steps = 5

# Learning rate lambda function
def lr_lambda(epoch):
    if epoch < warm_up_epochs:
        # Linear warm-up
        return float(epoch) / float(max(1, warm_up_epochs))
    # Exponential decay
    return decay_rate ** ((epoch - warm_up_epochs) / decay_steps)

In [15]:
# Define the Transformer model
class TransformerRegressor(nn.Module):
    def __init__(self, input_dim, num_heads, ff_dim, num_transformer_blocks, output_dim, dropout_rate=0.1):
        super(TransformerRegressor, self).__init__()
        # Define the transformer blocks with dropout
        self.transformer_blocks = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, 
                                       dim_feedforward=ff_dim, dropout=dropout_rate)
            for _ in range(num_transformer_blocks)
        ])
        # Optional: Add dropout before the final linear layer
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # Reshape input for transformer: [sequence_length, batch_size, feature_size]
        x = x.transpose(0, 1)  # Swap batch_size and sequence_length dimensions
        for transformer in self.transformer_blocks:
            x = transformer(x)
        x = x.transpose(0, 1)  # Swap back the dimensions
        # Apply dropout before the final linear layer
        x = self.dropout(x[:, 0, :])  
        return self.linear(x).view(-1, 1)  # Reshape output to [batch_size, 1]

# Small model with R2 of 0.66 after 15 epochs
model_og = TransformerRegressor(
    input_dim=X_train.shape[1], 
    num_heads=1, 
    ff_dim=64, 
    num_transformer_blocks=1, 
    output_dim=1,
    #dropout_rate=0.1  # Specify the dropout rate
).to(device)

optimizer = torch.optim.Adam(model_og.parameters(), lr=0.001)

# Large model with 6 heads
model = TransformerRegressor(
    input_dim=X_train.shape[1], 
    num_heads=6, 
    ff_dim=256, 
    num_transformer_blocks=4, 
    output_dim=1,
    dropout_rate=0.1  # Specify the dropout rate
).to(device)

# Large with 2 heads
model_2_head = TransformerRegressor(
    input_dim=X_train.shape[1], 
    num_heads=2, 
    ff_dim=256, 
    num_transformer_blocks=4, 
    output_dim=1,
    dropout_rate=0.1  # Specify the dropout rate
).to(device)

# Twice as large and deep with 2 heads
model_2_head_huge = TransformerRegressor(
    input_dim=X_train.shape[1], 
    num_heads=2, 
    ff_dim=512, 
    num_transformer_blocks=8, 
    output_dim=1,
    dropout_rate=0.1  # Specify the dropout rate
).to(device)

# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=initial_lr)

# Define LR scheduler
scheduler = LambdaLR(optimizer, lr_lambda)
# Loss
criterion = nn.MSELoss()


# Training loop
def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=50):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))
            targets = targets.view(-1, 1)  # Ensure targets are the correct shape
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        # Update the learning rate
        scheduler.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}, LR: {scheduler.get_last_lr()[0]}')

# Training loop OG
def train_model_og(model, train_loader, criterion, optimizer, num_epochs=15):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))
            targets = targets.view(-1, 1)  # Ensure targets are the correct shape
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')



# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    targets_list = []
    outputs_list = []
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)
            #
            outputs = model(inputs.unsqueeze(1))
            targets = targets.view(-1, 1)  # Ensure targets are the correct shape
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            targets_list.append(targets.cpu())  # Move targets back to CPU
            outputs_list.append(outputs.cpu())  # Move outputs back to CPU
        
        # Concatenate all batches
        all_targets = torch.cat(targets_list, dim=0)
        all_outputs = torch.cat(outputs_list, dim=0)

        # Calculate R-squared score
        r2 = r2_score(all_targets.numpy(), all_outputs.numpy())
        
        print(f'Test Loss: {total_loss/len(test_loader)}')
        print(f'R-squared: {r2}')



In [16]:
# Run training 
#train_model(model_og, train_loader, criterion, optimizer, scheduler, num_epochs=15)


In [17]:
# Run training 
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model_og(model_og, train_loader, criterion, optimizer, num_epochs=15)


Epoch 1/15, Loss: 499390.50979785476
Epoch 2/15, Loss: 499428.1962665017
Epoch 3/15, Loss: 499409.1318069307
Epoch 4/15, Loss: 499466.3776608911


KeyboardInterrupt: 

In [None]:
#train_model(model_2_head, train_loader, criterion, optimizer, scheduler, num_epochs=15)

In [None]:
#train_model(model_2_head_huge, train_loader, criterion, optimizer, scheduler, num_epochs=15)

In [None]:
# and evaluation
evaluate_model(model_og, test_loader)