## Pytorch

##### We built a PyTorch-based regression model to predict sales using a company orders dataset. The project involved data preprocessing, defining a neural network, training it with MSE loss, and evaluating predictions

In [67]:
!pip install torch



#### Import the libraries

In [83]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch

#### Load the Dataset

In [84]:
df = pd.read_csv('/Users/hpourmand/Desktop/Retail/Superstore Sales Dataset.csv')

### Data Preproccesing

#### Handle Missing Values

In [85]:
df['Postal Code'] = df['Postal Code'].fillna('Unknown')

#### Convert date columns to numerical values

In [86]:
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst = True)
df['Ship Date'] = pd.to_datetime(df['Ship Date'], dayfirst = True)
df['Order Date'] = (df['Order Date'] - df['Order Date'].min()).dt.days
df['Ship Date'] = (df['Ship Date'] - df['Ship Date'].min()).dt.days

#### Encode categorical variables

In [87]:
df = pd.get_dummies(df, drop_first=True)

#### Features & Target

In [88]:
X = df.drop(columns=['Sales'])
y = df['Sales']

#### Normalize numerical features

In [89]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#### Split data into training and test sets

In [90]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#### Convert to PyTorch tensors

In [91]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

### Neural Network Model

In [92]:
class SalesPredictionModel(nn.Module):
    def __init__(self, input_dim):
        super(SalesPredictionModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.output_layer(x)
        return x

# Initialize the model
input_dim = X_train.shape[1]
model = SalesPredictionModel(input_dim)

### Set Up the Loss Function and Optimizer

In [93]:
# Mean Squared Error Loss for regression
criterion = nn.MSELoss()

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Train the Model

In [94]:
# Number of epochs
epochs = 100

# Training loop
for epoch in range(epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 370975.2500
Epoch [20/100], Loss: 368208.0000
Epoch [30/100], Loss: 362333.5000
Epoch [40/100], Loss: 351472.2500
Epoch [50/100], Loss: 333757.9688
Epoch [60/100], Loss: 308032.0312
Epoch [70/100], Loss: 275252.6250
Epoch [80/100], Loss: 239666.6719
Epoch [90/100], Loss: 207636.3594
Epoch [100/100], Loss: 183137.3906


### Evaluate the Model

In [95]:
# Evaluate on test set
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_pred, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')
    
    # Calculate Mean Squared Error on test data
    mse = mean_squared_error(y_test_tensor.numpy(), y_pred.numpy())
    print(f'Mean Squared Error: {mse:.4f}')

Test Loss: 634563.0000
Mean Squared Error: 634563.0000


### Make Predictions

In [97]:
# Make predictions on new data or test data
model.eval()
with torch.no_grad():
    y_new_pred = model(X_test_tensor)
    print(f'Predictions: {y_new_pred[:10]}')  

Predictions: tensor([[  0.9406],
        [ 32.7971],
        [181.9749],
        [  0.7834],
        [ 72.6814],
        [ 33.0613],
        [262.6530],
        [  0.6578],
        [266.3345],
        [ 10.6596]])
