In [1]:
import pandas as pd
import xgboost as xgb
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Load dataset
df = pd.read_csv('AmazonDataSales_v2.csv')
# Drop all columns except 'amount', 'category', 'size', 'quantity'
df = df[['amount', 'category', 'size', 'qty']]

# One-hot encode the 'category', 'size', and 'qty' columns
# Select all columns except 'amount' as feature columns
feature_columns = df.columns.drop('amount')
# One-hot encode the feature columns
df_encoded = pd.get_dummies(df, columns=feature_columns)

# Assuming 'df' contains your dataset
X = df_encoded.drop('amount', axis=1)  # Features
y = df['amount']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to train and evaluate a model
def train_evaluate_model(model, X_train, y_train, X_test, y_test):
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate r2 score and rsme
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    
    return r2, rmse

#define model
regressor = xgb.XGBRegressor(objective='reg:squarederror')
#train model
regressor_train = train_evaluate_model(regressor, X_train, y_train, X_test, y_test)
print(f"Not normalized score R2/MSE:{regressor_train}")


  df = pd.read_csv('AmazonDataSales_v2.csv')


Not normalized score R2/MSE:(0.42399884819387323, 187.29478076027766)


In [3]:
#Count the number of feature columns
num_feature_columns = len(df_encoded.columns)
print(f"Number of feature columns: {num_feature_columns}")

Number of feature columns: 25


In [4]:
# Convert to numpy arrays (required for PyTorch tensors)
X_np = np.array(X, dtype=np.float32)
y_np = np.array(y, dtype=np.float32)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_np)
y_tensor = torch.tensor(y_np)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [5]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Transformer Model for Regression
class TransformerRegressor(nn.Module):
    def __init__(self, input_dim, num_heads, ff_dim, num_transformer_blocks, output_dim):
        super(TransformerRegressor, self).__init__()
        self.transformer_blocks = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dim_feedforward=ff_dim)
            for _ in range(num_transformer_blocks)
        ])
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # Reshape input for transformer: [sequence_length, batch_size, feature_size]
        x = x.transpose(0, 1)  # Swap batch_size and sequence_length dimensions
        for transformer in self.transformer_blocks:
            x = transformer(x)
        x = x.transpose(0, 1)  # Swap back the dimensions
        return self.linear(x[:, 0, :]).view(-1, 1)  # Reshape output to [batch_size, 1]

# Model instantiation
model = TransformerRegressor(
    input_dim=X_train.shape[1], 
    num_heads=1, 
    ff_dim=64, 
    num_transformer_blocks=1, 
    output_dim=1
).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=50):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)   
            #          
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))
            targets = targets.view(-1, 1)  # Ensure targets are the correct shape
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    targets_list = []
    outputs_list = []
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)
            #
            outputs = model(inputs.unsqueeze(1))
            targets = targets.view(-1, 1)  # Ensure targets are the correct shape
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            targets_list.append(targets.cpu())  # Move targets back to CPU
            outputs_list.append(outputs.cpu())  # Move outputs back to CPU
        
        # Concatenate all batches
        all_targets = torch.cat(targets_list, dim=0)
        all_outputs = torch.cat(outputs_list, dim=0)

        # Calculate R-squared score
        r2 = r2_score(all_targets.numpy(), all_outputs.numpy())
        
        print(f'Test Loss: {total_loss/len(test_loader)}')
        print(f'R-squared: {r2}')

# Run training and evaluation
train_model(model, train_loader, criterion, optimizer, num_epochs=15)
evaluate_model(model, test_loader)

Using device: cuda
Epoch 1/15, Loss: 350616.90808886057
Epoch 2/15, Loss: 113674.98082615859
Epoch 3/15, Loss: 42775.57843191965
Epoch 4/15, Loss: 35087.43879211841
Epoch 5/15, Loss: 34943.26577580251
Epoch 6/15, Loss: 34843.96644145939
Epoch 7/15, Loss: 34816.967092501065
Epoch 8/15, Loss: 34808.88633211097
Epoch 9/15, Loss: 34753.171328922195
Epoch 10/15, Loss: 34762.42282498937
Epoch 11/15, Loss: 34735.12168168049
Epoch 12/15, Loss: 34756.59190648916
Epoch 13/15, Loss: 34762.411862909226
Epoch 14/15, Loss: 34752.0398344494
Epoch 15/15, Loss: 34730.5900131537
Test Loss: 35202.059713612434
R-squared: 0.4217062677253687


In [6]:
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=2000, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2000, shuffle=False)

In [7]:
class FeedForwardRegressor(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2):
        super(FeedForwardRegressor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, 1)  # Output layer for regression

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x

# Model instantiation and move to device
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
model = FeedForwardRegressor(input_size, hidden_size1, hidden_size2).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))  # Add an extra dimension to targets
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    targets_list = []
    outputs_list = []
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))  # Add an extra dimension to targets

            total_loss += loss.item()
            targets_list.append(targets.cpu())
            outputs_list.append(outputs.cpu())
        
        # Concatenate all batches
        all_targets = torch.cat(targets_list, dim=0)
        all_outputs = torch.cat(outputs_list, dim=0)

        # Calculate R-squared score
        r2 = r2_score(all_targets.numpy(), all_outputs.numpy())
        
        print(f'Test Loss: {total_loss/len(test_loader)}')
        print(f'R-squared: {r2}')

# Run training and evaluation
train_model(model, train_loader, criterion, optimizer, num_epochs=200)
evaluate_model(model, test_loader) 


Epoch 1/200, Loss: 452110.6028645833
Epoch 2/200, Loss: 444347.5670572917
Epoch 3/200, Loss: 408235.7604166667
Epoch 4/200, Loss: 326383.2757161458
Epoch 5/200, Loss: 209532.048828125
Epoch 6/200, Loss: 106709.93245442708
Epoch 7/200, Loss: 63018.721028645836
Epoch 8/200, Loss: 52489.43107096354
Epoch 9/200, Loss: 48140.10213216146
Epoch 10/200, Loss: 44876.283854166664
Epoch 11/200, Loss: 42639.10986328125
Epoch 12/200, Loss: 40100.312744140625
Epoch 13/200, Loss: 38843.72550455729
Epoch 14/200, Loss: 37697.788248697914
Epoch 15/200, Loss: 37085.298177083336
Epoch 16/200, Loss: 36236.6875
Epoch 17/200, Loss: 35820.05887858073
Epoch 18/200, Loss: 35432.542724609375
Epoch 19/200, Loss: 35300.19132486979
Epoch 20/200, Loss: 34755.94685872396
Epoch 21/200, Loss: 34835.57702636719
Epoch 22/200, Loss: 35079.6083984375
Epoch 23/200, Loss: 34798.88814290365
Epoch 24/200, Loss: 34713.94384765625
Epoch 25/200, Loss: 34490.59338378906
Epoch 26/200, Loss: 35264.5009358724
Epoch 27/200, Loss: 3457

In [15]:
class FeedForwardRegressor(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2):
        super(FeedForwardRegressor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, 1)  # Output layer for regression

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x

# Model instantiation and move to device
input_size = X_train.shape[1]
hidden_size1 = 2
hidden_size2 = 2
model = FeedForwardRegressor(input_size, hidden_size1, hidden_size2).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))  # Add an extra dimension to targets
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    targets_list = []
    outputs_list = []
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_loader:
            # Move data to the device
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))  # Add an extra dimension to targets

            total_loss += loss.item()
            targets_list.append(targets.cpu())
            outputs_list.append(outputs.cpu())
        
        # Concatenate all batches
        all_targets = torch.cat(targets_list, dim=0)
        all_outputs = torch.cat(outputs_list, dim=0)

        # Calculate R-squared score
        r2 = r2_score(all_targets.numpy(), all_outputs.numpy())
        
        print(f'Test Loss: {total_loss/len(test_loader)}')
        print(f'R-squared: {r2}')

# Run training and evaluation
train_model(model, train_loader, criterion, optimizer, num_epochs=300)
evaluate_model(model, test_loader) 


Epoch 1/300, Loss: 451025.8834635417
Epoch 2/300, Loss: 450513.373046875
Epoch 3/300, Loss: 451224.9088541667
Epoch 4/300, Loss: 450796.853515625
Epoch 5/300, Loss: 449535.9759114583
Epoch 6/300, Loss: 447373.68359375
Epoch 7/300, Loss: 447136.96484375
Epoch 8/300, Loss: 445613.4225260417
Epoch 9/300, Loss: 443922.5872395833
Epoch 10/300, Loss: 442466.5787760417
Epoch 11/300, Loss: 441471.2415364583
Epoch 12/300, Loss: 440091.7005208333
Epoch 13/300, Loss: 436661.4713541667
Epoch 14/300, Loss: 433224.9778645833
Epoch 15/300, Loss: 430045.4010416667
Epoch 16/300, Loss: 427313.029296875
Epoch 17/300, Loss: 423067.2493489583
Epoch 18/300, Loss: 420710.1627604167
Epoch 19/300, Loss: 417021.3984375
Epoch 20/300, Loss: 409546.7369791667
Epoch 21/300, Loss: 405269.5266927083
Epoch 22/300, Loss: 400449.634765625
Epoch 23/300, Loss: 393208.4231770833
Epoch 24/300, Loss: 387109.03515625
Epoch 25/300, Loss: 380936.3639322917
Epoch 26/300, Loss: 374089.7317708333
Epoch 27/300, Loss: 368266.6803385