## Motivation
Practice Regression using DeepLearning Framework
## Goal
Predict Age of Possume based on measurements

In [None]:
!pip install --upgrade tensorflow sagemaker
!pip install keras
!pip install missingno



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import missingno as msno

In [None]:
# Import Data:

# Source: https://www.kaggle.com/datasets/abrambeyer/openintro-possum
df = pd.read_csv('possum.csv')
df.drop(['case','site','Pop'], inplace = True, axis = 1)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
msno.bar(df)

In [None]:
df.dropna(inplace = True)

In [None]:
sns.heatmap(df.corr(), annot = True)

In [None]:
sns.pairplot(df, hue="sex")

In [None]:
## Feature Engineering

dataset = pd.get_dummies(df, columns=['sex'], prefix='', prefix_sep='')

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:,1:], dataset['age'], test_size=0.33, random_state=42)

## Baseline Model

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train_scaled, y_train)

In [None]:
reg.score(X_train_scaled, y_train)

In [None]:
# Residual Analysis

predictions = reg.predict(X_test_scaled)
foo = np.concatenate((np.array(y_test - predictions).reshape(-1,1), predictions.reshape(-1,1)), axis = 1)
foo2 = pd.DataFrame(foo, columns = ['residuals','predictions'])
sns.regplot(foo2, y = 'residuals', x='predictions')

## Artificual Neural Network

In [None]:
# Prepare Normalizer
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(X_train))

In [None]:
from tensorflow.keras import regularizers

# Create the Model
model = tf.keras.Sequential([
    normalizer,
    layers.Dense(8, activation='relu', kernel_regularizer=regularizers.L1(0.01)),
    layers.Dropout(.2),
    layers.Dense(8, activation='relu', kernel_regularizer=regularizers.L1(0.01)),
    layers.Dropout(.2),
    layers.Dense(units=1)
])

model.summary()

In [None]:
# Compile the Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
from keras.callbacks import EarlyStopping

# patient early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200)

In [None]:
%%time
history = model.fit(
    X_train,
    y_train,
    epochs=25,
    verbose = 0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2,
    callbacks = [es]
)

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)

plot_loss(history)

In [None]:
#model.predict(X_train[:10])

MSE between train and test sets are comparable.

In [None]:
predictions = model.predict(X_test).reshape(-1,)

In [None]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_train, model.predict(X_train).reshape(-1,))

In [None]:
mean_absolute_error(y_test, predictions)

In [None]:
#foo = np.concatenate((np.array(y_test - predictions).reshape(-1,1), predictions.reshape(-1,1)), axis = 1)
foo = np.concatenate((np.array(y_test - predictions).reshape(-1,1), predictions.reshape(-1,1)), axis = 1)

In [None]:
foo2 = pd.DataFrame(foo, columns = ['residuals','predictions'])

In [None]:
# https://stackoverflow.com/questions/62681388/residual-plot-for-residual-vs-predicted-value-in-python
sns.regplot(foo2, y = 'residuals',x='predictions')

In [None]:
# https://stackoverflow.com/questions/65699008/how-can-i-calculate-r-square-in-pandas-dataframe-elegantly

from sklearn.metrics import r2_score
r2_score(foo2['residuals'], foo2['predictions'])

Based on the correlation plot, we can rule out any modeling for age with the features given to us. The models produce poor results because these features have weak predictive power for age. 

Resources/References:
- https://www.tensorflow.org/tutorials/keras/regression
- https://www.kaggle.com/code/elisthefox/predicting-fuzzy-and-his-possum-friends