# Project Part 3

[![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/dereckhelms/project-part3-dh)


## 1. Creation of Feed-Forward Model
Although I do not believe a feed-forward model is the most appropriate for this set of structured data, I want to construct one to see how it behaves in relation to another deep learning model. 
Below are the imports I will use for this section.
It is necessary to convert the binary smoker feature to one hot encoding for the model to analyze it without errors. StandardScaler is used to scale the data to a normal distribution and if all the data is not of float type I will receive many errors.

NOTE: Due to the restrictions of my windows machine, I am unable to run the results locally, but everything in Kaggle can be run and analyzed. 

The model has been fit for the values of smoker where yes and no have been transformed into binary values. As 'features['smoker'] = label_encoder.fit_transform(features['smoker'])' suggest.

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('/kaggle/input/healthcare-insurance/insurance.csv')

# Extract features and target variable
features = df[['age', 'bmi', 'children', 'smoker', 'region']]
target = df['charges']

# Explicitly create a copy of the DataFrame to avoid SettingWithCopyWarning
features = features.copy()

# Convert categorical features to one-hot encoding
label_encoder = LabelEncoder()
features['smoker'] = label_encoder.fit_transform(features['smoker'])
features = pd.get_dummies(features, columns=['region'])

# Standardize numerical features
scaler = StandardScaler()
features[['age', 'bmi', 'children']] = scaler.fit_transform(features[['age', 'bmi', 'children']])

# Ensure all data is of numeric type
features = features.astype('float32')
target = target.astype('float32')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Define the model architecture
class InsuranceModel(nn.Module):
    def __init__(self, input_size):
        super(InsuranceModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)  # Adjust the number of hidden units as needed
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)  # Output is a single number for insurance charge prediction

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create a model instance
input_size = X_train_tensor.shape[1]
model = InsuranceModel(input_size)

# Set up the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adjust the learning rate as needed

# Lists to store training loss for visualization
train_losses = []

# Training loop
num_epochs = 1000  # Adjust as needed
for epoch in range(num_epochs):
    # Forward pass
    predictions = model(X_train_tensor)
    
    # Compute the loss
    loss = criterion(predictions, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Append the training loss to the list
    train_losses.append(loss.item())
    
    # Print training information
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Plot the training loss over epochs
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.legend()
plt.show()

# Evaluate the model on the test set
with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test_tensor, test_predictions)
    print(f'Mean Absolute Error (MAE): {mae:.4f}')

# Plot predicted versus actual values on the test set
plt.scatter(y_test_tensor, test_predictions, alpha=0.5)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs. Predicted Charges on Test Set')
plt.show()

# Print some predicted versus actual values
with torch.no_grad():
    model.eval()
    sample_indices = np.random.choice(len(X_test_tensor), 5, replace=False)
    for i in sample_indices:
        prediction = model(X_test_tensor[i])
        print(f'Sample {i + 1}: Predicted={prediction.item():.4f}, Actual={y_test_tensor[i].item():.4f}')



ModuleNotFoundError: No module named 'torch'

In [None]:
import matplotlib.pyplot as plt


# Lists to store training loss for visualization
train_losses = []

# Training loop
for epoch in range(num_epochs):

    train_losses.append(loss.item())


# Plot the training loss over epochs
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.legend()
plt.show()

with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

# Plot predicted versus actual values on the test set
plt.scatter(y_test_tensor, test_predictions, alpha=0.5)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs. Predicted Charges on Test Set')
plt.show()

# Print some predicted versus actual values
with torch.no_grad():
    model.eval()
    sample_indices = np.random.choice(len(X_test_tensor), 5, replace=False)
    for i in sample_indices:
        prediction = model(X_test_tensor[i])
        print(f'Sample {i + 1}: Predicted={prediction.item():.4f}, Actual={y_test_tensor[i].item():.4f}')

The test loss of a nearly constant 314447200.0000 was unexpectedly large. It was assumed that the test loss was going to be significantly higher than in part 2 since deep models do not perform the best on structured data. However, when one observes the MAE of the random forest was = 2755.556764385067 and in comparision the MAE of the feed-forward model was = 12633.0352 which is nearly 6 times larger. In the next section I will see if adjusting the hyperparameters (learning rate and dropout rate) will improve the MAE and test loss.
I will also add a validation data set. 


## 2. Adding validation graphs and adjusting hyperparameters of Feed-Forward Model
Below I changed the dropout rate to .7, changed the learning rate to .006 to increase the nodes that drop out and to take more granular steps across the model to find new minima. Additionally, I added training and validation graphs to see how the model is performing.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Assuming 'features' is your input data and 'target' is the corresponding target variable (charges)
# Update this based on your actual dataset
# features = ...
# target = ...

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(features, target, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Define the model architecture
class FeedForwardModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_rate=0.7):
        super(FeedForwardModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Set the input size, hidden layer sizes, and output size
input_size = features.shape[1]
hidden_size1 = 64  # Adjust as needed
hidden_size2 = 32  # Adjust as needed
output_size = 1
dropout_rate = 0.7  # Adjust as needed

# Instantiate the model
model = FeedForwardModel(input_size, hidden_size1, hidden_size2, output_size, dropout_rate)

# Set up the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.006)

# Training loop
num_epochs = 1000  # Adjust as needed
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    # Forward pass and training loss
    model.train()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Validation loss
    model.eval()
    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor)

    # Save losses for plotting
    train_losses.append(loss.item())
    val_losses.append(val_loss.item())

    # Print training information
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# Plot the training and validation losses
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    mae_test = mean_absolute_error(y_test_tensor.numpy(), test_predictions.numpy())
    print(f'Mean Absolute Error on Test Set: {mae_test:.4f}')

# Visualize results
plt.scatter(y_test_tensor.numpy(), test_predictions.numpy(), alpha=0.5)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs. Predicted Charges on Test Set')
plt.show()

The fine tuning of hyper parameters drastically decreased the MAE to 8751.6582, only 4 times larger than the MAE of the random forest model. Since fine tuning these hyper parameters drastically decreased the MAE, these parameters and this model will be selected for the final model of part 3.


## 3. Increasing the lr and dropout rate of Feed-Forward Model
Below I changed the dropout rate to .8, changed the learning rate to .007 to see the results.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Assuming 'features' is your input data and 'target' is the corresponding target variable (charges)
# Update this based on your actual dataset
# features = ...
# target = ...

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(features, target, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Define the model architecture
class FeedForwardModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_rate=0.8):
        super(FeedForwardModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Set the input size, hidden layer sizes, and output size
input_size = features.shape[1]
hidden_size1 = 64  # Adjust as needed
hidden_size2 = 32  # Adjust as needed
output_size = 1
dropout_rate = 0.8  # Adjust as needed

# Instantiate the model
model = FeedForwardModel(input_size, hidden_size1, hidden_size2, output_size, dropout_rate)

# Set up the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.007)

# Training loop
num_epochs = 1000  # Adjust as needed
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    # Forward pass and training loss
    model.train()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Validation loss
    model.eval()
    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor)

    # Save losses for plotting
    train_losses.append(loss.item())
    val_losses.append(val_loss.item())

    # Print training information
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# Plot the training and validation losses
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    mae_test = mean_absolute_error(y_test_tensor.numpy(), test_predictions.numpy())
    print(f'Mean Absolute Error on Test Set: {mae_test:.4f}')

# Visualize results
plt.scatter(y_test_tensor.numpy(), test_predictions.numpy(), alpha=0.5)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs. Predicted Charges on Test Set')
plt.show()

The MAE decreased further to 8554.5703, so I will continue to increase the learning rate and dropout rate to see if the MAE continues to decrease.


## 4. Increasing the lr and dropout rate of Feed-Forward Model
Below I changed the dropout rate to .9, changed the learning rate to .008 to see the further hypothesized improvement.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Assuming 'features' is your input data and 'target' is the corresponding target variable (charges)
# Update this based on your actual dataset
# features = ...
# target = ...

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(features, target, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Define the model architecture
class FeedForwardModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_rate=0.9):
        super(FeedForwardModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Set the input size, hidden layer sizes, and output size
input_size = features.shape[1]
hidden_size1 = 64  # Adjust as needed
hidden_size2 = 32  # Adjust as needed
output_size = 1
dropout_rate = 0.9  # Adjust as needed

# Instantiate the model
model = FeedForwardModel(input_size, hidden_size1, hidden_size2, output_size, dropout_rate)

# Set up the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.008)

# Training loop
num_epochs = 1000  # Adjust as needed
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    # Forward pass and training loss
    model.train()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Validation loss
    model.eval()
    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor)

    # Save losses for plotting
    train_losses.append(loss.item())
    val_losses.append(val_loss.item())

    # Print training information
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# Plot the training and validation losses
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    mae_test = mean_absolute_error(y_test_tensor.numpy(), test_predictions.numpy())
    print(f'Mean Absolute Error on Test Set: {mae_test:.4f}')

# Visualize results
plt.scatter(y_test_tensor.numpy(), test_predictions.numpy(), alpha=0.5)
plt.xlabel('Actual Charges')
plt.ylabel('Predicted Charges')
plt.title('Actual vs. Predicted Charges on Test Set')
plt.show()

The MAE continued to decrease to 8526.3916; however the validation error in the set continue to increase. This showed that there was a point of diminishing returns and that the model was overfitting. 


## 5. Graphs for Comparison
This is the same residual graph I used in part 2 and will use this to measure the success of the feed forward model. The residual graph shows that the feed forward model is not as accurate as the random forest model. The feed forward model has a much larger range of residuals and the residuals are not as close to 0 as the random forest model.

In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test_tensor, test_predictions)
    print(f'Mean Absolute Error (MAE): {mae:.4f}')

