In [1]:
# Import libraries
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import warnings
import evaluate
warnings.filterwarnings("ignore", category=Warning)

In [2]:
# Load Data
data = pd.read_csv("processed_data.txt", sep="|")
data.head()

Unnamed: 0,User ID,Gender_Mapped,Age_Scaled,Sqrt_Salary_Scaled,Purchased
0,15624510,1,-1.781797,-1.785227,0
1,15810944,1,-0.253587,-1.730989,0
2,15668575,0,-1.113206,-0.732165,0
3,15603246,0,-1.017692,-0.256825,0
4,15804002,1,-1.781797,0.302609,0


In [3]:
# Define features
features = data[["Gender_Mapped", "Age_Scaled", "Sqrt_Salary_Scaled"]]
X = features.to_numpy()
X

array([[ 1.        , -1.78179743, -1.7852271 ],
       [ 1.        , -0.25358736, -1.73098855],
       [ 0.        , -1.11320552, -0.73216508],
       ...,
       [ 0.        ,  1.17910958, -1.73098855],
       [ 1.        , -0.15807423, -1.12151841],
       [ 0.        ,  1.08359645, -0.99916839]])

In [4]:
# Define target
target = data["Purchased"]
y = target.to_numpy()
y

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,

In [5]:
# Transform target and features to tensors
X = torch.tensor(X)
y = torch.tensor(y)

In [6]:
# For newer versions of Python/Jupyter, change default so that method config attributes always show
from sklearn import set_config
set_config(print_changed_only=False, display='diagram')

In [7]:
# Divide data into training and testing.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=20)

In [8]:
# Create model
in_features = features.shape[1]
out_features = 1

model = nn.Sequential(
    nn.Linear(in_features, 2),
    nn.Linear(2, out_features),
    nn.Sigmoid()
)

In [9]:
# Created the dataset and the dataloader
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train.float(), y_train.float())
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

In [10]:
# Loss function
criterion = nn.MSELoss()

In [11]:
# Create the optimizer
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.75)

In [12]:
# Loop over the number of epochs and the dataloader
num_of_epochs = 1000

for i in range(num_of_epochs):
    for data in dataloader:
        #Set the gradients to zero
        optimizer.zero_grad()
        # Run a forward pass
        X_train, y_train = data
        prediction = model(X_train)
        # Calculate the loss
        loss = criterion(prediction, y_train)
        # Compute the gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()
    if (i + 1) % 100 == 0:
        print(f"Finished epoch {i}, latest loss {loss}")

Finished epoch 99, latest loss 0.2606392800807953
Finished epoch 199, latest loss 0.2524322271347046
Finished epoch 299, latest loss 0.25743284821510315
Finished epoch 399, latest loss 0.2446332722902298
Finished epoch 499, latest loss 0.22877047955989838
Finished epoch 599, latest loss 0.20430044829845428
Finished epoch 699, latest loss 0.20698267221450806
Finished epoch 799, latest loss 0.1968591809272766
Finished epoch 899, latest loss 0.17555810511112213
Finished epoch 999, latest loss 0.29173046350479126


In [13]:
# Model accuracy
with torch.no_grad():
    y_pred = model(X_test.float())

accuracy = (y_pred.round() == y).float().mean()
print(f"Model accuracy is {accuracy}")

Model accuracy is 0.6424999833106995


In [14]:
# Import scaler object from "Preprocessing" notebook
%run 02_Preprocessing.ipynb

In [15]:
# Test an input array
def predict_new_data(test_input):
    scaler.fit(test_input)
    scaled_input = scaler.fit_transform(test_input)
    test_tensor = torch.tensor(scaled_input)
    return model(test_tensor.float())

In [None]:
# Test model
test_data = [[0, 2.5, 4.1]]
predict_new_data(test_data)