In [None]:
!conda install numpy pytorch torchvision cpuonly -c pytorch -y
!pip install matplotlib --upgrade --quiet
!pip install seaborn

/bin/bash: conda: command not found


In [None]:
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
import os
import seaborn as sns

In [None]:
insurance_bill=pd.read_csv(r'/content/insurance.csv')
insurance_bill.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [None]:
insurance_bill.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def customize_dataset(insurance_bill, rand_str):
    dataframe = insurance_bill.copy(deep=True)
    dataframe = dataframe.sample(int(0.95*len(dataframe)), random_state=int(ord(rand_str[0])))
    dataframe.bmi = dataframe.bmi * ord(rand_str[1])/100.
    dataframe.charges = dataframe.charges * ord(rand_str[2])/100.
    if ord(rand_str[3]) % 2 == 1:
        dataframe = dataframe.drop(['region'], axis=1)
    return dataframe

In [None]:
name='vinay'

In [None]:
dataframe = customize_dataset(insurance_bill, name)
dataframe.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
1034,61,male,40.299,0,no,14245.07832
556,46,male,35.112,1,no,9168.04856
1021,22,female,32.571,3,yes,39155.14878
693,24,male,24.83775,0,no,2588.265295
403,49,male,33.915,3,no,11296.406


In [None]:
input_cols = dataframe.drop('charges',axis=1).columns
input_cols

Index(['age', 'sex', 'bmi', 'children', 'smoker'], dtype='object')

In [None]:
categorical_cols = dataframe.select_dtypes('object').columns.to_list()
categorical_cols

['sex', 'smoker']

In [None]:
num_cols = len(dataframe.columns)
print(num_cols)

6


In [None]:
num_rows = len(dataframe)
max_charge = dataframe['charges'].max()
min_charge = dataframe['charges'].min()
avg_charge = dataframe['charges'].mean()
output_cols = ['charges']

In [None]:
def dataframe_to_arrays(dataframe):
    dataframe1 = dataframe.copy(deep=True)
    for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
    inputs_array = dataframe1[input_cols].to_numpy()
    targets_array = dataframe1[output_cols].to_numpy()
    return inputs_array, targets_array

In [None]:
inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs_array, targets_array

(array([[61.     ,  1.     , 40.299  ,  0.     ,  0.     ],
        [46.     ,  1.     , 35.112  ,  1.     ,  0.     ],
        [22.     ,  0.     , 32.571  ,  3.     ,  1.     ],
        ...,
        [58.     ,  1.     , 26.43375,  0.     ,  0.     ],
        [34.     ,  1.     , 44.2365 ,  2.     ,  0.     ],
        [19.     ,  1.     , 31.7625 ,  0.     ,  1.     ]]),
 array([[14245.07832 ],
        [ 9168.04856 ],
        [39155.14878 ],
        ...,
        [13124.237775],
        [ 5636.60757 ],
        [35803.17455 ]]))

In [None]:
inputs = torch.from_numpy(inputs_array).to(dtype=torch.float32)
targets = torch.from_numpy(targets_array).to(dtype=torch.float32)
inputs,targets

(tensor([[61.0000,  1.0000, 40.2990,  0.0000,  0.0000],
         [46.0000,  1.0000, 35.1120,  1.0000,  0.0000],
         [22.0000,  0.0000, 32.5710,  3.0000,  1.0000],
         ...,
         [58.0000,  1.0000, 26.4338,  0.0000,  0.0000],
         [34.0000,  1.0000, 44.2365,  2.0000,  0.0000],
         [19.0000,  1.0000, 31.7625,  0.0000,  1.0000]]), tensor([[14245.0781],
         [ 9168.0488],
         [39155.1484],
         ...,
         [13124.2373],
         [ 5636.6074],
         [35803.1758]]))

In [None]:
inputs.dtype, targets.dtype

(torch.float32, torch.float32)

In [None]:
dataset = TensorDataset(inputs, targets)

In [None]:
val_percent = 0.15
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size

train_ds, val_ds = random_split(dataset,[train_size,val_size])

In [None]:
batch_size = 120
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
for xb, yb in train_loader:
    print("inputs:", xb)
    print("targets:", yb)
    break

inputs: tensor([[31.0000,  0.0000, 24.7800,  2.0000,  0.0000],
        [44.0000,  1.0000, 41.4960,  0.0000,  0.0000],
        [58.0000,  1.0000, 24.4650,  0.0000,  0.0000],
        [24.0000,  0.0000, 24.3705,  0.0000,  0.0000],
        [55.0000,  0.0000, 31.1850,  2.0000,  0.0000],
        [29.0000,  1.0000, 33.7155,  2.0000,  0.0000],
        [63.0000,  0.0000, 29.1270,  0.0000,  1.0000],
        [54.0000,  1.0000, 22.0605,  2.0000,  0.0000],
        [40.0000,  0.0000, 34.4137,  2.0000,  1.0000],
        [48.0000,  1.0000, 38.5035,  1.0000,  0.0000],
        [18.0000,  1.0000, 27.4312,  0.0000,  0.0000],
        [35.0000,  0.0000, 37.6530,  2.0000,  0.0000],
        [47.0000,  0.0000, 37.8000,  1.0000,  0.0000],
        [39.0000,  1.0000, 35.8050,  2.0000,  0.0000],
        [36.0000,  0.0000, 23.7300,  2.0000,  1.0000],
        [42.0000,  1.0000, 27.3735,  1.0000,  1.0000],
        [35.0000,  0.0000, 35.9205,  1.0000,  0.0000],
        [32.0000,  1.0000, 33.0750,  1.0000,  0.0000],
  

In [None]:
input_size = len(input_cols)
output_size = len(output_cols)
class InsuranceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size)
        
    def forward(self, xb):
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)          
        loss = F.l1_loss(out,targets)     
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        loss = F.l1_loss(out,targets)           
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean() 
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        if (epoch+1) % 20 == 0 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))

In [None]:
model = InsuranceModel()
list(model.parameters())
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

In [None]:
result = evaluate(model,val_loader) 
print(result)

{'val_loss': 14729.0078125}


In [None]:
result = evaluate(model,val_loader) 
print(result)

{'val_loss': 14729.0078125}


In [None]:
epochs = 300
lr = 1e-3
history2 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 14213.0195
Epoch [40], val_loss: 13697.4297
Epoch [60], val_loss: 13181.8486
Epoch [80], val_loss: 12675.1299
Epoch [100], val_loss: 12184.5830
Epoch [120], val_loss: 11728.2871
Epoch [140], val_loss: 11309.5117
Epoch [160], val_loss: 10920.7500
Epoch [180], val_loss: 10573.6914
Epoch [200], val_loss: 10252.9297
Epoch [220], val_loss: 9949.2031
Epoch [240], val_loss: 9690.8936
Epoch [260], val_loss: 9456.2471
Epoch [280], val_loss: 9236.6318
Epoch [300], val_loss: 9054.0098


In [None]:
epochs = 300
lr = 1e-3
history2 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 8889.4238
Epoch [40], val_loss: 8748.6787
Epoch [60], val_loss: 8626.7129
Epoch [80], val_loss: 8520.0547
Epoch [100], val_loss: 8438.0703
Epoch [120], val_loss: 8377.4219
Epoch [140], val_loss: 8329.1885
Epoch [160], val_loss: 8284.5918
Epoch [180], val_loss: 8253.7617
Epoch [200], val_loss: 8226.6699
Epoch [220], val_loss: 8200.8652
Epoch [240], val_loss: 8180.0840
Epoch [260], val_loss: 8163.8262
Epoch [280], val_loss: 8148.0986
Epoch [300], val_loss: 8136.2485


In [None]:
epochs = 100
lr = 1e-5
history4 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 8136.1533
Epoch [40], val_loss: 8136.0332
Epoch [60], val_loss: 8135.9155
Epoch [80], val_loss: 8135.8018
Epoch [100], val_loss: 8135.6982


In [None]:
epochs = 500
lr = 1e-5
history5 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 8135.6113
Epoch [40], val_loss: 8135.5229
Epoch [60], val_loss: 8135.4160
Epoch [80], val_loss: 8135.3057
Epoch [100], val_loss: 8135.1914
Epoch [120], val_loss: 8135.0732
Epoch [140], val_loss: 8134.9233
Epoch [160], val_loss: 8134.8081
Epoch [180], val_loss: 8134.6904
Epoch [200], val_loss: 8134.5566
Epoch [220], val_loss: 8134.4268
Epoch [240], val_loss: 8134.3301
Epoch [260], val_loss: 8134.2251
Epoch [280], val_loss: 8134.1055
Epoch [300], val_loss: 8133.9878
Epoch [320], val_loss: 8133.8818
Epoch [340], val_loss: 8133.7832
Epoch [360], val_loss: 8133.7031
Epoch [380], val_loss: 8133.5850
Epoch [400], val_loss: 8133.4775
Epoch [420], val_loss: 8133.3857
Epoch [440], val_loss: 8133.3037
Epoch [460], val_loss: 8133.1782
Epoch [480], val_loss: 8133.0938
Epoch [500], val_loss: 8132.9668


In [None]:
val_loss = 8132.9668

In [None]:

def predict_single(input, target, model):
    inputs = input.unsqueeze(0)
    predictions = model(inputs)               
    prediction = predictions[0].detach()
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)

In [None]:
input, target = val_ds[10]
predict_single(input, target, model)

Input: tensor([29.0000,  0.0000, 26.8800,  4.0000,  0.0000])
Target: tensor([6279.7539])
Prediction: tensor([6827.6768])


In [None]:
input, target = val_ds[10]
predict_single(input, target, model)

Input: tensor([29.0000,  0.0000, 26.8800,  4.0000,  0.0000])
Target: tensor([6279.7539])
Prediction: tensor([6827.6768])


In [None]:
input, target = val_ds[23]
predict_single(input, target, model)

Input: tensor([46.0000,  1.0000, 27.0900,  5.0000,  0.0000])
Target: tensor([11106.6670])
Prediction: tensor([9387.2725])
