# Admissions Data

Codacademy Exercise: Deep Learning Regression

Predict graduate school admission probability using a neural network to perform regression.

In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# load the dataset into a pandas DataFrame
dataset = pd.read_csv('admissions_data.csv')

# print the first five entries in the dataset and the summary stats
print(dataset.head(5))
print(dataset.describe())

In [None]:
# remove the serial number column from the dataset
dataset = dataset.drop(['Serial No.'], axis=1)

# split the data into labels and features
labels = dataset.iloc[:, -1] # select the last column
features = dataset.iloc[:, 0:-1] # select all columns except the last

# split the data into a training set and a test set
features_train, features_test, labels_train_set, labels_test_set = train_test_split(features, labels, test_size=0.20, random_state=42)

# standardize the numerical features
numerical_features = features.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns
ct = ColumnTransformer([('numeric', StandardScaler(), numerical_columns)], remainder='passthrough')
features_train_scale = ct.fit_transform(features_train)
features_test_scale = ct.transform(features_test)

In [None]:
# build the model
num_features = features.shape[1]
my_model = Sequential()
my_model.add(InputLayer(input_shape=(num_features,)))
my_model.add(Dense(16, activation = 'relu')) # hidden layer
my_model.add(Dropout(0.1))
my_model.add(Dense(8, activation = 'relu')) # hidden layer
my_model.add(Dropout(0.2))
my_model.add(Dense(1)) # output layer
print(my_model.summary())

# initialize the gradient descent optimizer
opt = Adam(learning_rate=0.005)

# compile the model
# using mean-squared error as the loss function and mean average error as the metric
my_model.compile(loss = 'mse', metrics = ['mae'], optimizer = opt)

In [None]:
# train the model
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20) # early stopping
history = my_model.fit(features_train_scale, labels_train_set, epochs=100, batch_size=8, verbose=1, validation_split=0.25, callbacks=[es])

In [None]:
# evaluate the trained model with the test set
val_mse, val_mae = my_model.evaluate(features_test_scale, labels_test_set, verbose=1)
print('MAE: ', val_mae)

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax1.plot(history.history['mae'])
ax1.plot(history.history['val_mae'])
ax1.set_title('model mae')
ax1.set_ylabel('MAE')
ax1.set_xlabel('epoch')
ax1.legend(['train', 'validation'], loc='upper left')

# Plot loss and val_loss over each epoch
ax2 = fig.add_subplot(2, 1, 2)
ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_title('model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['train', 'validation'], loc='upper left')

# used to keep plots from overlapping each other
fig.tight_layout()