# INSURANCE COST PREDICTION


## using linear regression

In [1]:
#Importing necessary libraries
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split


dataframe_raw=pd.read_csv("./insurance.csv")
dataframe_raw.head()

input_cols=['age', 'sex', 'bmi', 'children', 'smoker' ]
output_cols=['charges']

categorical_cols=['sex', 'smoker']


#from_dataframe() returns input and target arrays required
def from_dataframe(dataframe):
    df=dataframe.copy(deep=True)
   
    #The categorical columns have been converted to numeric values
    for col in categorical_cols: 
        df[col]=df[col].astype('category').cat.codes
     
    #The input_cols input columns will be converted tp numpy arrays
    input_arrays=df[input_cols].to_numpy()
    target_arrays=df[output_cols].to_numpy()
    
    
    return input_arrays, target_arrays
    
    

In [2]:
input_array,target_array=from_dataframe(dataframe_raw)

#Convert arrays into tensors
inputs=torch.from_numpy(input_array).float()
target=torch.from_numpy(target_array).float()

#Merge the input and target together in a dataset using TensorDataset
dataset=TensorDataset(inputs, target)

print(dataset)


#Split the datset into train and validation datset
train_ds, val_ds=random_split(dataset,[900,438])
val_ds


<torch.utils.data.dataset.TensorDataset object at 0x000001F353533130>


<torch.utils.data.dataset.Subset at 0x1f353533970>

## LOADING DATA

In [3]:
batch_size=30
train_loader=DataLoader(train_ds, batch_size, shuffle=True)
val_loader=DataLoader(val_ds,batch_size)

In [4]:
#Checking the train_loader, data loaded into train_loader
for x,y in train_loader:
    print("inputs:",x)
    print("targets:",y)
    break
    
    

inputs: tensor([[27.0000,  1.0000, 18.9050,  3.0000,  0.0000],
        [21.0000,  1.0000, 20.2350,  3.0000,  0.0000],
        [53.0000,  1.0000, 20.9000,  0.0000,  1.0000],
        [56.0000,  0.0000, 28.5950,  0.0000,  0.0000],
        [33.0000,  1.0000, 35.2450,  0.0000,  0.0000],
        [49.0000,  1.0000, 22.5150,  0.0000,  0.0000],
        [50.0000,  1.0000, 32.3000,  2.0000,  0.0000],
        [60.0000,  1.0000, 24.3200,  0.0000,  0.0000],
        [58.0000,  0.0000, 27.1700,  0.0000,  0.0000],
        [59.0000,  1.0000, 28.7850,  0.0000,  0.0000],
        [42.0000,  0.0000, 26.6000,  0.0000,  1.0000],
        [33.0000,  0.0000, 39.8200,  1.0000,  0.0000],
        [31.0000,  0.0000, 26.6200,  0.0000,  0.0000],
        [38.0000,  0.0000, 30.2100,  3.0000,  0.0000],
        [33.0000,  0.0000, 35.5300,  0.0000,  1.0000],
        [48.0000,  1.0000, 40.5650,  2.0000,  1.0000],
        [31.0000,  1.0000, 38.3900,  2.0000,  0.0000],
        [52.0000,  1.0000, 33.2500,  0.0000,  0.0000],
  

# TRAINING MODEL USING LINEAR REGRESSION MODEL

In [5]:
class InsuranceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear=nn.Linear(len(input_cols), len(output_cols))
        
    def forward(self,xb):
        out=self.linear(xb)
        return out
    
    def training_step(self,batch):
        inputs,target=batch
        out=self(inputs)
        loss=F.mse_loss(out,target)
        return loss
    
    def validation_step(self,batch):
        inputs, targets=batch
        out=self(inputs)
        loss=F.mse_loss(out,targets)
       
        return {'val_loss': loss.detach()}
    
    def validation_epoch(self,outputs):
        batch_losses=[x['val_loss'] for x in outputs]
        epoch_losses=torch.stack(batch_losses).mean()
        return {'val_loss':epoch_losses.item()}
    
    def epoch_end(self, epoch,result,num_epochs):
        if (epoch+1) % 20 == 0 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
        
        

In [6]:
model=InsuranceModel()

# THE LINEAR REGRESSION MODEL

In [7]:
def evaluate_model(model,val_loader):
    outputs=[model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch(outputs)





def fit(epochs,lr,model,train_loader, val_loader,opt_func=torch.optim.SGD):
    history=[]
    optimizer=opt_func(model.parameters(),lr)
    
    for epoch in range(epochs):
        
        for batch in train_loader:
            loss=model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        result=evaluate_model(model,val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history
            
            
        
    
    

In [9]:
epoch=100
lr=0.5
history1 = fit(epoch, lr, model, train_loader, val_loader)


Epoch [20], val_loss: nan
Epoch [40], val_loss: nan
Epoch [60], val_loss: nan
Epoch [80], val_loss: nan
Epoch [100], val_loss: nan
