1. Data Gathering
2. Data Preprocessing
3. Feature Engineering
4. Model Training
5. Model Testing

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [None]:
data = pd.read_csv("./data/insurance.csv")
data.head()

In [None]:
data.shape

In [None]:
data.isna().sum()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data['region'].unique()

In [None]:
train,test = train_test_split(data,test_size=0.2,random_state=42)

In [None]:
label_encoder = {}
for col in ['sex','smoker','region']:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col])
    test[col] = le.transform(test[col])
    label_encoder[col] = le
print(label_encoder)

In [None]:
X_train = train.drop(columns=['charges'])
y_train = train['charges']
X_test = test.drop(columns=['charges'])
y_test = test['charges']

In [None]:
X_train

In [None]:
y_train

In [None]:
# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
print(X_train)
type(X_train)

In [None]:
y_test.shape

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1,1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1,1)


In [None]:
print(y_test_tensor.shape)

In [None]:
class SimpleNNRegressionModel(nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features,64),
            nn.ReLU(),
            nn.Linear(64,128),
            nn.ReLU(),
            nn.Linear(128,1)
        )
    def forward(self,x):
        return self.network(x)

In [None]:
X_train_tensor.shape

In [None]:
num_features = X_train_tensor.shape[1]
model = SimpleNNRegressionModel(num_features=num_features)
model

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.01)

In [None]:
epochs = 30000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()  # Clear previous gradients
    prediction = model(X_train_tensor)
    loss = criterion(prediction, y_train_tensor)
    loss.backward()        # Compute gradients via backpropagation
    optimizer.step()       # Update model parameters
    if epoch % 500 == 0:
        print(f"Epoch: {epoch}/{epochs} Loss: {loss.item():.4f}")


**`model.eval()`**

- Puts the model in evaluation mode
- Turns off things like Dropout and BatchNorm behavior that are only used during training.

**`model(X_test_tensor)`**

- This runs the model forward on your test data (X_test_tensor).
- It returns a PyTorch tensor with the predicted outputs.

**`.detach()`**

- Detaches the output tensor from the computation graph.
- Prevents PyTorch from tracking further operations on it (i.e., no gradient history).
- This is crucial because you don't need gradients during evaluation, and detaching avoids unnecessary memory usage and computation.

**`.numpy()`**

- Converts the detached PyTorch tensor to a NumPy array.

This is useful for:

- Visualizing or saving predictions
- Post-processing
- Comparing predictions to actual values using NumPy or scikit-learn tools

In [None]:
# Model Eval
model.eval() 
y_pred = model(X_test_tensor).detach().numpy()

In [None]:
y_test_numpy = y_test_tensor.numpy()

mse = mean_squared_error(y_test_numpy,y_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_pred,y_test_numpy)
r2 = r2_score(y_pred,y_test_numpy)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f'RMSE: {rmse}')
print(f"R2_Score: {r2}")

In [None]:
def predict_charge(age, sex, bmi, children, smoker, region):
    input_data = pd.DataFrame([[age, sex, bmi, children, smoker, region]],
                              columns=['age', 'sex', 'bmi', 'children', 'smoker', 'region'])

    for col in ['sex','smoker','region']:
        input_data[col] = label_encoder[col].transform(input_data[col])
    input_data = scaler.transform(input_data)
    input_tensor = torch.tensor(input_data,dtype=torch.float32)
    predicted_charge = model(input_tensor).item()
    return predicted_charge

In [90]:
predict_charge(40,'female',27.9,0,'yes','southwest')