In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F

In [3]:
%pwd
%cd "../"
%cd "data"

c:\Users\johny\ChurnProject\Customer-Churn
c:\Users\johny\ChurnProject\Customer-Churn\data


In [77]:
class MyDataset(Dataset):
 
  def __init__(self,split):
    X = pd.read_csv(f"X_{split}.csv")
    y = pd.read_csv(f"y_{split}.csv")

    self.input_size = X.shape[1]

    self.x=torch.tensor(X.values,dtype=torch.float32)
    self.y=torch.tensor(y.values,dtype=torch.long).view(-1)
    print(self.x.shape, self.y.shape)
 
  def __len__(self):
    return len(self.y)
  
   
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]

In [78]:
training_set=MyDataset(split="train")
validation_set=MyDataset(split="test") 
training_loader=DataLoader(training_set, batch_size=80, shuffle=True)
validation_loader=DataLoader(validation_set,batch_size=80,shuffle=False)

torch.Size([5625, 35]) torch.Size([5625])
torch.Size([1407, 35]) torch.Size([1407])


In [79]:
print('Training set has {} instances'.format(len(training_set)))
print('Input vector size set has length of {} '.format(training_set.input_size))
print('Validation set has {} instances'.format(len(validation_set)))

Training set has 5625 instances
Input vector size set has length of 35 
Validation set has 1407 instances


In [122]:
class ChurnClassifier(nn.Module):
    def __init__(self, input_length, num_classes):
        super(ChurnClassifier, self).__init__()
        self.fc1 = nn.Linear(input_length, 120)
        self.fc2 = nn.Linear(120, 120)
        self.fc3 = nn.Linear(120, 120)
        self.fc4 = nn.Linear(120, 50)
        self.fc5 = nn.Linear(50, 10)
        self.fc6 = nn.Linear(10, num_classes)
        self.relu = nn.ReLU()
        self.loss = torch.nn.CrossEntropyLoss()
        self.dropout = nn.Dropout(0.1)


    def forward(self, x, targets=None):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        x = self.relu(x)
        x = self.fc6(x)
        if targets is None:
            loss = None
        else:
            loss = self.loss(x, targets)
        return x, loss

if torch.cuda.is_available():
	device = "cuda:0"
else:
	device = "cpu"


model = ChurnClassifier(input_length=training_set.input_size, num_classes=2)
model = model.to(device)



In [123]:
init_memory = torch.cuda.memory_allocated() 

In [119]:
# Optimizers specified in the torch.optim package
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [124]:
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs, loss = model(inputs, labels)

        loss.backward()
        
        # Adjust learning weights
        optimizer.step()

        # Gather data and report, item to ensure that we not store the computation graph
        running_loss += loss.item()
        del outputs, loss, inputs, labels

    

    last_loss = running_loss / (i + 1)

            

    return last_loss

In [121]:
EPOCHS = 5
epoch_number = 0


best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number)


    running_vloss = 0.0
    correct_predictions = 0  # Initialize the count of correct predictions
    total_samples = 0  # Initialize the count of total validation samples

    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            vinputs, vlabels = vinputs.to(device), vlabels.to(device)
            voutputs, vloss = model(vinputs, vlabels)
            running_vloss += vloss.item()
            voutputs, vlabels = voutputs.cpu(), vlabels.cpu()
            # Calculate accuracy
            _, predicted = torch.max(voutputs, 1)
            correct_predictions += (predicted == vlabels).sum().item()
            total_samples += vlabels.size(0)
            del vinputs, vlabels, vloss, voutputs

    avg_vloss = running_vloss / (i + 1)
    accuracy = correct_predictions / total_samples 
    print('LOSS train {:.4f} LOSS valid: {:.4f}   ACCURACY: {:.2%}'.format(avg_loss, avg_vloss, accuracy))


    # Log the running loss averaged per batch
    # for both training and validation


    epoch_number += 1

EPOCH 1:
LOSS train 0.5898 LOSS valid: 0.5642   ACCURACY: 74.63%
EPOCH 2:
LOSS train 0.5815 LOSS valid: 0.5630   ACCURACY: 74.63%
EPOCH 3:
LOSS train 0.5822 LOSS valid: 0.5633   ACCURACY: 74.63%
EPOCH 4:
LOSS train 0.5796 LOSS valid: 0.5615   ACCURACY: 74.63%
EPOCH 5:
LOSS train 0.5781 LOSS valid: 0.5575   ACCURACY: 74.63%
EPOCH 6:
LOSS train 0.5719 LOSS valid: 0.5460   ACCURACY: 74.63%
EPOCH 7:
LOSS train 0.5448 LOSS valid: 0.4958   ACCURACY: 74.63%
EPOCH 8:
LOSS train 0.4807 LOSS valid: 0.4350   ACCURACY: 74.63%
EPOCH 9:
LOSS train 0.4428 LOSS valid: 0.4208   ACCURACY: 77.83%
EPOCH 10:
LOSS train 0.4308 LOSS valid: 0.4347   ACCURACY: 77.54%
EPOCH 11:
LOSS train 0.4294 LOSS valid: 0.4176   ACCURACY: 78.96%
EPOCH 12:
LOSS train 0.4231 LOSS valid: 0.4184   ACCURACY: 78.89%
EPOCH 13:
LOSS train 0.4217 LOSS valid: 0.4227   ACCURACY: 78.32%
EPOCH 14:
LOSS train 0.4172 LOSS valid: 0.4348   ACCURACY: 77.83%
EPOCH 15:
LOSS train 0.4212 LOSS valid: 0.4279   ACCURACY: 78.46%
EPOCH 16:
LOSS trai