In [1]:
# Import libraries
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=Warning)

In [2]:
# Load Data
data = pd.read_csv("processed_data.txt", sep="|")
data.head()

Unnamed: 0,Gender_Mapped,Age_Scaled,Sqrt_Salary_Scaled,Purchased
0,1,-1.781797,-1.785227,0
1,1,-0.253587,-1.730989,0
2,0,-1.113206,-0.732165,0
3,0,-1.017692,-0.256825,0
4,1,-1.781797,0.302609,0


In [3]:
# Define features
features = data[["Gender_Mapped", "Age_Scaled", "Sqrt_Salary_Scaled"]]
X = features.to_numpy()

In [4]:
# Define target
target = data["Purchased"]
y = target.to_numpy()

In [5]:
# Transform target and features to tensors
X = torch.tensor(X)
y = torch.tensor(y)

In [6]:
# Divide data into training and testing.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=35)

In [7]:
# Create model
in_features = features.shape[1]
out_features = 1

model = nn.Sequential(
    nn.Linear(in_features, out_features),
    nn.Sigmoid()
)

In [8]:
# Created the dataset and the dataloader
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train.float(), y_train.float())
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

In [9]:
# Loss function
criterion = nn.MSELoss()

In [10]:
# Create the optimizer
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.75)

In [11]:
# Loop over the number of epochs and the dataloader
num_of_epochs = 100

for i in range(num_of_epochs):
    model.train()
    for data in dataloader:
        #Set the gradients to zero
        optimizer.zero_grad()
        # Run a forward pass
        X_train, y_train = data
        prediction = model(X_train)
        # Calculate the loss
        loss = criterion(prediction, y_train)
        # Compute the gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()
    if (i + 1) % 10 == 0:
        print(f"Finished epoch {i}, latest loss {loss}")

Finished epoch 9, latest loss 0.2895435094833374
Finished epoch 19, latest loss 0.26771080493927
Finished epoch 29, latest loss 0.24758954346179962
Finished epoch 39, latest loss 0.24830064177513123
Finished epoch 49, latest loss 0.24766437709331512
Finished epoch 59, latest loss 0.2598118484020233
Finished epoch 69, latest loss 0.2197066843509674
Finished epoch 79, latest loss 0.22961576282978058
Finished epoch 89, latest loss 0.24468353390693665
Finished epoch 99, latest loss 0.26337918639183044


In [12]:
# Model accuracy
with torch.no_grad():
    y_pred = model(X_test.float())

accuracy = (y_pred.round() == y).float().mean()
print(f"Model accuracy is {accuracy}")

Model accuracy is 0.6424999833106995


In [13]:
# Evaluation
model.eval()
test_loss = 0.0
with torch.no_grad():
    for data in dataloader:
        prediction = model(X_train)
        loss = criterion(prediction, y_train)
        test_loss += loss.item()
        #if (i + 1) % 10 == 0:
            #print(f"Test Loss : {test_loss/len(dataloader)}")

In [14]:
# Import scaler object from "Preprocessing" notebook
%store -r scaler

In [15]:
# Test an input array
def predict_new_data(test_input):
    scaler.fit(test_input)
    scaled_input = scaler.fit_transform(test_input)
    test_tensor = torch.tensor(scaled_input)
    return model(test_tensor.float())

In [16]:
# Use model to make predictions
test_data = [
[1, 20, 450],
[0, 64, 150],
[1, 32, 370],
[0, 30, 200],
[0, 28, 100],
[1, 70, 350],
[1, 61, 275],
[1, 42, 275],
[0, 56, 275],
[1, 47, 375]
]

In [17]:
# Use new data for model
predict_new_data(test_data)

tensor([[0.2776],
        [0.6327],
        [0.3155],
        [0.5477],
        [0.5659],
        [0.3981],
        [0.3952],
        [0.3550],
        [0.5883],
        [0.3444]], grad_fn=<SigmoidBackward0>)