In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F

In [3]:
%pwd
%cd "../"
%cd "data"

c:\Users\johny\ChurnProject\Customer-Churn
c:\Users\johny\ChurnProject\Customer-Churn\data


In [48]:
class MyDataset(Dataset):
 
  def __init__(self,split):
    X = pd.read_csv(f"X_{split}.csv")
    y = pd.read_csv(f"y_{split}.csv")

    self.input_size = X.shape[1]

    self.x=torch.tensor(X.values,dtype=torch.float32)
    self.y=torch.tensor(y.values,dtype=torch.long).view(-1)
    print(self.x.shape, self.y.shape)
 
  def __len__(self):
    return len(self.y)
  
   
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]

In [49]:
training_set=MyDataset(split="train")
validation_set=MyDataset(split="test") 
training_loader=DataLoader(training_set, batch_size=100, shuffle=True)
validation_loader=DataLoader(validation_set,batch_size=100,shuffle=False)

torch.Size([5625, 35]) torch.Size([5625])
torch.Size([1407, 35]) torch.Size([1407])


In [50]:
print('Training set has {} instances'.format(len(training_set)))
print('Input vector size set has length of {} '.format(training_set.input_size))
print('Validation set has {} instances'.format(len(validation_set)))

Training set has 5625 instances
Input vector size set has length of 35 
Validation set has 1407 instances


In [51]:
class ChurnClassifier(nn.Module):
    def __init__(self, input_length, num_classes):
        super(ChurnClassifier, self).__init__()
        self.fc1 = nn.Linear(input_length, 120)
        self.fc2 = nn.Linear(120, 120)
        self.fc3 = nn.Linear(120, 120)
        self.fc4 = nn.Linear(120, 50)
        self.fc5 = nn.Linear(50, 10)
        self.fc6 = nn.Linear(10, num_classes)
        self.relu = nn.ReLU()
        self.loss = torch.nn.CrossEntropyLoss()
        self.dropout = nn.Dropout(0.1)


    def forward(self, x, targets=None):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        x = self.relu(x)
        x = self.fc6(x)
        if targets is None:
            loss = None
        else:
            loss = self.loss(x, targets)
        return x, loss



model = ChurnClassifier(input_length=training_set.input_size, num_classes=2)

In [56]:
model(torch.rand(training_set.input_size))

(tensor([-0.0567, -0.2343], grad_fn=<AddBackward0>), None)

In [63]:
# Optimizers specified in the torch.optim package
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [64]:
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs, loss = model(inputs, labels)

        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

    return last_loss

In [65]:
EPOCHS = 20
epoch_number = 0


best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number)


    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs, vloss = model(vinputs, vlabels)
            running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print('LOSS  valid {}'.format( avg_vloss))

    # Log the running loss averaged per batch
    # for both training and validation


    epoch_number += 1

EPOCH 1:


LOSS  valid 0.5503108501434326
EPOCH 2:
LOSS  valid 0.5513670444488525
EPOCH 3:
LOSS  valid 0.5508398413658142
EPOCH 4:
LOSS  valid 0.5505504012107849
EPOCH 5:
LOSS  valid 0.5490055084228516
EPOCH 6:
LOSS  valid 0.5469681024551392
EPOCH 7:
LOSS  valid 0.5382995009422302
EPOCH 8:
LOSS  valid 0.5132974982261658
EPOCH 9:
LOSS  valid 0.4446110427379608
EPOCH 10:
LOSS  valid 0.41627538204193115
EPOCH 11:
LOSS  valid 0.4232126474380493
EPOCH 12:
LOSS  valid 0.4186873137950897
EPOCH 13:
LOSS  valid 0.41015464067459106
EPOCH 14:
LOSS  valid 0.41167986392974854
EPOCH 15:
LOSS  valid 0.41034409403800964
EPOCH 16:
LOSS  valid 0.4328996241092682
EPOCH 17:
LOSS  valid 0.40870848298072815
EPOCH 18:
LOSS  valid 0.41760143637657166
EPOCH 19:
LOSS  valid 0.40999001264572144
EPOCH 20:
LOSS  valid 0.4287678301334381
