In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F

In [3]:
%pwd
%cd "../"
%cd "data"

c:\Users\johny\ChurnProject\Customer-Churn
c:\Users\johny\ChurnProject\Customer-Churn\data


In [77]:
class MyDataset(Dataset):
 
  def __init__(self,split):
    X = pd.read_csv(f"X_{split}.csv")
    y = pd.read_csv(f"y_{split}.csv")

    self.input_size = X.shape[1]

    self.x=torch.tensor(X.values,dtype=torch.float32)
    self.y=torch.tensor(y.values,dtype=torch.long).view(-1)
    print(self.x.shape, self.y.shape)
 
  def __len__(self):
    return len(self.y)
  
   
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]

In [78]:
training_set=MyDataset(split="train")
validation_set=MyDataset(split="test") 
training_loader=DataLoader(training_set, batch_size=80, shuffle=True)
validation_loader=DataLoader(validation_set,batch_size=80,shuffle=False)

torch.Size([5625, 35]) torch.Size([5625])
torch.Size([1407, 35]) torch.Size([1407])


In [79]:
print('Training set has {} instances'.format(len(training_set)))
print('Input vector size set has length of {} '.format(training_set.input_size))
print('Validation set has {} instances'.format(len(validation_set)))

Training set has 5625 instances
Input vector size set has length of 35 
Validation set has 1407 instances


In [104]:
class ChurnClassifier(nn.Module):
    def __init__(self, input_length, num_classes):
        super(ChurnClassifier, self).__init__()
        self.fc1 = nn.Linear(input_length, 120)
        self.fc2 = nn.Linear(120, 120)
        self.fc3 = nn.Linear(120, 120)
        self.fc4 = nn.Linear(120, 50)
        self.fc5 = nn.Linear(50, 10)
        self.fc6 = nn.Linear(10, num_classes)
        self.relu = nn.ReLU()
        self.loss = torch.nn.CrossEntropyLoss()
        self.dropout = nn.Dropout(0.1)


    def forward(self, x, targets=None):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        x = self.relu(x)
        x = self.fc6(x)
        if targets is None:
            loss = None
        else:
            loss = self.loss(x, targets)
        return x, loss



model = ChurnClassifier(input_length=training_set.input_size, num_classes=2)

In [105]:
model(torch.rand(training_set.input_size))

(tensor([0.2135, 0.1638], grad_fn=<AddBackward0>), None)

In [106]:
# Optimizers specified in the torch.optim package
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [107]:
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs, loss = model(inputs, labels)

        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
    
    last_loss = running_loss / (i + 1)
        

    return last_loss

In [108]:
EPOCHS = 50
epoch_number = 0


best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number)


    running_vloss = 0.0
    correct_predictions = 0  # Initialize the count of correct predictions
    total_samples = 0  # Initialize the count of total validation samples

    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs, vloss = model(vinputs, vlabels)
            running_vloss += vloss
            # Calculate accuracy
            _, predicted = torch.max(voutputs, 1)
            correct_predictions += (predicted == vlabels).sum().item()
            total_samples += vlabels.size(0)

    avg_vloss = running_vloss / (i + 1)
    accuracy = correct_predictions / total_samples 
    print('LOSS train {:.4f} LOSS valid: {:.4f}   ACCURACY: {:.2%}'.format(avg_loss, avg_vloss, accuracy))


    # Log the running loss averaged per batch
    # for both training and validation


    epoch_number += 1

EPOCH 1:
LOSS train 0.6022 LOSS valid: 0.5635   ACCURACY: 74.63%
EPOCH 2:
LOSS train 0.5828 LOSS valid: 0.5630   ACCURACY: 74.63%
EPOCH 3:
LOSS train 0.5801 LOSS valid: 0.5628   ACCURACY: 74.63%
EPOCH 4:
LOSS train 0.5795 LOSS valid: 0.5602   ACCURACY: 74.63%
EPOCH 5:
LOSS train 0.5783 LOSS valid: 0.5547   ACCURACY: 74.63%
EPOCH 6:
LOSS train 0.5637 LOSS valid: 0.5285   ACCURACY: 74.63%
EPOCH 7:
LOSS train 0.5064 LOSS valid: 0.4411   ACCURACY: 78.32%
EPOCH 8:
LOSS train 0.4493 LOSS valid: 0.4281   ACCURACY: 78.61%
EPOCH 9:
LOSS train 0.4330 LOSS valid: 0.4203   ACCURACY: 78.39%
EPOCH 10:
LOSS train 0.4283 LOSS valid: 0.4274   ACCURACY: 78.04%
EPOCH 11:
LOSS train 0.4267 LOSS valid: 0.4276   ACCURACY: 78.54%
EPOCH 12:
LOSS train 0.4215 LOSS valid: 0.4180   ACCURACY: 79.67%
EPOCH 13:
LOSS train 0.4188 LOSS valid: 0.4163   ACCURACY: 79.39%
EPOCH 14:
LOSS train 0.4201 LOSS valid: 0.4211   ACCURACY: 79.39%
EPOCH 15:
LOSS train 0.4185 LOSS valid: 0.4179   ACCURACY: 79.60%
EPOCH 16:
LOSS trai