In [77]:
import pandas as pd
import tqdm as tqdm
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os

In [78]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [107]:
train_data = pd.read_csv('train.csv')
# test_data = pd.read_csv('test.csv')

In [110]:
print(train_data.head())
# print(test_data.columns)

   id  Gender   Age  Driving_License  Region_Code  Previously_Insured  \
0   0       0  21.0                1         35.0                   0   
1   1       0  43.0                1         28.0                   0   
2   2       1  25.0                1         14.0                   1   
3   3       1  35.0                1          1.0                   0   
4   4       1  36.0                1         15.0                   1   

   Vehicle_Age  Vehicle_Damage  Annual_Premium  Policy_Sales_Channel  Vintage  \
0            0               1         65101.0                 124.0    187.0   
1            1               1         58911.0                  26.0    288.0   
2            0               0         38043.0                 152.0    254.0   
3            0               1          2630.0                 156.0     76.0   
4            0               0         31951.0                 152.0    294.0   

   Response  
0         0  
1         1  
2         0  
3         0  
4   

In [109]:
gender_map = {
    'Male' : 0,
    'Female' : 1
}
vehical_age_map = {
    '1-2 Year' : 0,
    '> 2 Years' : 1,
    '< 1 Year' : 0
}
vehical_damage_map = {
    'Yes' : 1,
    'No' : 0
}
def map_df(df):
    df['Gender'] = df['Gender'].map(gender_map)
    df['Vehicle_Age'] = df['Vehicle_Age'].map(vehical_age_map)
    df['Vehicle_Damage'] = df['Vehicle_Damage'].map(vehical_damage_map)
    df['Age'] = df['Age'].apply(lambda a: float(a))
    df['Vintage'] = df['Vintage'].apply(lambda v: float(v))

map_df(train_data)
# map_df(test_data)


In [111]:
scaler = StandardScaler()
X = train_data[['Gender', 'Age', 'Driving_License', 'Region_Code',
       'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage', 'Annual_Premium',
       'Policy_Sales_Channel', 'Vintage']]
y = train_data['Response']
nfeats = len(X.columns)
# X_test = test_data[['Gender', 'Age', 'Driving_License', 'Region_Code',
#        'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage', 'Annual_Premium',
#        'Policy_Sales_Channel', 'Vintage']]
# X_test[['Age', 'Region_Code', 'Annual_Premium',  'Policy_Sales_Channel', 'Vintage']] = scaler.fit_transform(X_test[['Age', 'Region_Code', 'Annual_Premium',  'Policy_Sales_Channel', 'Vintage']])





In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [113]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [114]:
class Insurance_Dataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [115]:
BATCH_SIZE = 16
CKPT_DIR = "CKPT"
LR = 0.001
EPOCHS = 10

In [116]:
train_dataset = Insurance_Dataset(X_train, y_train)
test_dataset = Insurance_Dataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [117]:
print(nfeats)

10


In [118]:
class Insurance_Model(nn.Module):
    def __init__(self, nfeats):
        super(Insurance_Model, self).__init__()
        self.input_layer = nn.Linear(nfeats, 64)
        self.lyr1 = nn.Linear(64, 32)
        self.lyr2 = nn.Linear(32, 8)
        self.lyr3 = nn.Linear(8, 1)
    
    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = torch.relu(self.lyr1(x))
        x = torch.relu(self.lyr2(x))
        x = torch.sigmoid(self.lyr3(x))
        return x
    
model = Insurance_Model(nfeats).to(device)

In [63]:

optimizer = optim.Adam(model.parameters(), lr=0.001)

In [119]:
def train(dataloader, model, optimizer, epoch):
    loss_fn = nn.BCELoss()
    model.train()
    with tqdm.tqdm(dataloader, unit="batch") as tbatch:
        for X, y in tbatch:
            X = X.to(device)
            y = y.to(device)

            pred = model(X)
            loss = loss_fn(pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    torch.save(
        {
            "epoch" : epoch,
            "model_state_dict" : model.state_dict(),
            "optimizer_state_dict" : optimizer.state_dict(),
            "loss" : loss
        },
        f"{CKPT_DIR}/ckpt{epoch}.pt",
    )

In [120]:
def test(dataloader, model, dataset_name):
    loss_fn = nn.BCELoss()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float32).sum().item()
    test_loss /= num_batches
    correct /= size
    print(
        f"{dataset_name} Accuracy: {(100 * correct):>0,1f}% Avg loss: {test_loss:>6f}\n"
    )


In [122]:
def make_or_restore_model(nfeat):
    model = Insurance_Model(nfeat)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    checkpoints = [
        CKPT_DIR + "/" + name
        for name in os.listdir(CKPT_DIR)
        if name[-1] == "t"
    ]
    if checkpoints:
        latest_checkpoint = max(checkpoints, key=os.path.getctime)
        print("Restoring from", latest_checkpoint)
        ckpt = torch.load(latest_checkpoint)
        model.load_state_dict(ckpt["model_state_dict"])
        optimizer.load_state_dict(ckpt["optimizer_state_dict"])
        epoch = ckpt["epoch"]
        return model, optimizer, epoch+1
    else: 
        print("Creating new model")
        return model, optimizer, 0



In [123]:
model, optimizer, epoch_start = make_or_restore_model(nfeats)
model.to(device)
for e in range(epoch_start, EPOCHS):
    print()
    print("Epoch", e)
    print("-------")
    model.train()
    train(train_loader, model, optimizer, e)
    print()
    model.eval()
    test(train_loader, model, "Train")
    test(test_loader, model, "Test")
    

Creating new model

Epoch 0
-------


100%|██████████| 575240/575240 [28:10<00:00, 340.25batch/s]



