In [None]:
#My Answer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras import optimizers

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score

data = pd.read_csv('Admission_Predict.csv')
#print(data.head())
#print(data.columns)
#print(data.describe())
#print(data.dtypes)

#Drop serial number column (no use for predicting admission success)
data = data.drop(['Serial No.'], axis = 1)
#print(data.columns)

#Split dataset into labels and features
labels = data.iloc[:, -1]
features = data.iloc[:, 0:-1]

#Split data into training and test
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)

#Normalise the numeric columns using ColumnTransformer
ct = ColumnTransformer([('norm', Normalizer(), features.columns)], remainder='passthrough')

#Fit the normalizer to the training and test data 
features_train_norm = ct.fit_transform(features_train) 
features_test_norm = ct.transform(features_test) 

#Convert from numpy arrays to pandas frame
features_train_norm = pd.DataFrame(features_train_norm, columns = features_train.columns)
features_test_norm = pd.DataFrame(features_test_norm, columns = features_test.columns)

####Create neural network to perform a regression analysis on the admission data####
model = Sequential()
input = layers.InputLayer(input_shape = (features.shape[1], ))

#Add input layer to the model
model.add(input)

#Add one hidden layer with 64 hidden units, using relu activation functions
model.add(layers.Dense(64, activation = 'relu'))

#Add an output layer with one neuron
model.add(layers.Dense(1))

print(model.summary())

opt = optimizers.Adam(learning_rate = 0.01)
model.compile(loss = 'mse', metrics = ['mae'], optimizer = opt)


####Hyperparameter tuning####
num_epochs = 80
batch_size = 5

#Initialise early stopping
es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 20)

# fit the model
history = model.fit(features_train_norm, labels_train, epochs = num_epochs, batch_size = batch_size, verbose=1, validation_split = 0.2, callbacks = [es])


#Evaluate training model on preprocessed test dataset
res_mse, res_mae = model.evaluate(features_test_norm, labels_test, verbose = 0)
# print validation mean squared error and mean absolute error
print('MSE: ', res_mse)
print('MAE: ', res_mae)

# evaluate R-squared score
y_pred = model.predict(features_test_norm)
print('R^2: ', r2_score(labels_test, y_pred))

# Do extensions code below
# if you decide to do the Matplotlib extension, you must save your plot in the directory by uncommenting the line of code below

# fig.savefig('static/images/my_plots.png')

In [None]:
###Same code as above, but using design_model function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras import optimizers

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score

data = pd.read_csv('Admission_Predict.csv')
#print(data.head())
#print(data.columns)
#print(data.describe())
#print(data.dtypes)

#Drop serial number column (no use for predicting admission success)
data = data.drop(['Serial No.'], axis = 1)
#print(data.columns)

#Split dataset into labels and features
labels = data.iloc[:, -1]
features = data.iloc[:, 0:-1]

#Split data into training and test
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)

#Normalise the numeric columns using ColumnTransformer
ct = ColumnTransformer([('norm', Normalizer(), features.columns)], remainder='passthrough')

#Fit the normalizer to the training and test data 
features_train_norm = ct.fit_transform(features_train) 
features_test_norm = ct.transform(features_test) 

#Convert from numpy arrays to pandas frame
features_train_norm = pd.DataFrame(features_train_norm, columns = features_train.columns)
features_test_norm = pd.DataFrame(features_test_norm, columns = features_test.columns)


# design deep learning model
def design_model(features, learning_rate):
    # initialize Sequential model
    model = Sequential()
    # create the input layer
    input = layers.InputLayer(input_shape=(features.shape[1],))
    # add the input layer to the model
    model.add(input)
    # add 2 hidden layers
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(16, activation='relu'))
    # add an output layer to the model
    model.add(layers.Dense(1))
    # initialize an Adam optimizer
    opt = optimizers.Adam(learning_rate=learning_rate)
    # compile model
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model


# hyperparameters
learning_rate = 0.05
epochs = 100
batch_size = 5

model = design_model(features_train_norm, learning_rate)
print(model.summary())

# initialize EarlyStopping that monitors the validation loss
stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)

# fit the model
history = model.fit(features_train_norm, labels_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.2, callbacks=[stop])
# evaluate the model on the test data
val_mse, val_mae = model.evaluate(features_test_norm, labels_test, verbose=0)

# print validation mean squared error and mean absolute error
print('MSE: ', val_mse)
print('MAE: ', val_mae)

# evaluate R-squared score
y_pred = model.predict(features_test_norm)
print('R^2: ', r2_score(labels_test, y_pred))

In [None]:
###Same code as above, but using design_model function and fit_model function **** NEED TO COMPLETE
def fit_model(f_train, l_train, learning_rate, num_epochs):
    #build the model: to see the specs go to model.pyl we increased the number of hidden neurons
    #in order to introduce some overfitting
    model = design_model(features_train, learning_rate) 
    #train the model on the training data
    es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 20)
    history = model.fit(features_train, labels_train, epochs=num_epochs, batch_size= 16, verbose=0, validation_split = 0.2, callbacks = [es])
    return history

learning_rate = 0.1
num_epochs = 500
es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 20)
history = model.fit(features_train, labels_train, epochs=num_epochs, batch_size= 16, verbose=0, validation_split = 0.2, callbacks = [es])


In [None]:
#**** LOOK INTO USING GRID SEARCH OR RANDOMISED SEARCH FOR HYPERPARAMETER TUNING

#https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
#https://machinelearningmastery.com/hyperparameter-optimization-with-random-search-and-grid-search/

In [None]:
def do_grid_search():
  batch_size = [6, 64]
  epochs = [10, 50]
  learning_rate = [0.1, 0.01, 0.001]
  model = KerasRegressor()
  param_grid = dict(batch_size=batch_size, epochs=epochs, learning_rate = learning_rate)
  grid = GridSearchCV(estimator = model, param_grid=param_grid, scoring = make_scorer(mean_squared_error, greater_is_better=False),return_train_score = True)
  grid_result = grid.fit(features_train, labels_train, verbose = 0)
  print(grid_result)
  print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  means = grid_result.cv_results_['mean_test_score']
  stds = grid_result.cv_results_['std_test_score']
  params = grid_result.cv_results_['params']
  for mean, stdev, param in zip(means, stds, params):
      print("%f (%f) with: %r" % (mean, stdev, param))

  print("Training")
  means = grid_result.cv_results_['mean_train_score']
  stds = grid_result.cv_results_['std_train_score']
  for mean, stdev, param in zip(means, stds, params):
      print("%f (%f) with: %r" % (mean, stdev, param))
    
print("-------------- GRID SEARCH --------------------")
do_grid_search()

In [None]:
###BELOW ARE ACTUAL ANSWERS FROM GITHUB

In [None]:
#Actual Answer from Github


import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping


# import dataset as pandas DataFrame
admissions = pd.read_csv('admissions_data.csv')

# inspect the columns and data types 
print(admissions.columns)
print(admissions.head())
print(admissions.describe())

# all variables are numerical

# split data into feature parameters and labels

# except for the first and last column, all in between columns are set as our feature parameters
features = admissions.iloc[:, 1:-1]
# last column is the feature we want to predict and contains the labels we'll use
labels = admissions.iloc[:, -1]

# split data into training and test sets
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2, random_state=5)

# scale features

# initialize scaling object
ct = ColumnTransformer([('scale', StandardScaler(), features.columns)])

# fit ColumnTransformer to the training data and transform it
features_train_scaled = ct.fit_transform(features_train)
# transform test data using the trained ColumnTransformer
features_test_scaled = ct.transform(features_test)

# design deep learning model
def design_model(features, learning_rate):
    # initialize Sequential model
    model = Sequential()
    # create the input layer
    input = layers.InputLayer(input_shape=(features.shape[1],))
    # add the input layer to the model
    model.add(input)
    # add 3 hidden layers and 3 dropout layers
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(8, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(8, activation='relu'))
    model.add(layers.Dropout(0.1))
    # add an output layer to the model
    model.add(layers.Dense(1))
    # initialize an Adam optimizer
    opt = optimizers.Adam(learning_rate=learning_rate)
    # compile model
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

# hyperparameters
learning_rate = 0.001
epochs = 80
batch_size = 1

model = design_model(features_train_scaled, learning_rate)
print(model.summary())

# initialize EarlyStopping that monitors the validation loss
stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)

# fit the model
history = model.fit(features_train_scaled, labels_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.2, callbacks=[stop])
# evaluate the model on the test data
val_mse, val_mae = model.evaluate(features_test_scaled, labels_test, verbose=0)

# print validation mean squared error and mean absolute error
print('MSE: ', val_mse)
print('MAE: ', val_mae)

# evaluate R-squared score
y_predicted = model.predict(features_test_scaled)
print('R^2: ', r2_score(labels_test,y_predicted))


# plot learning curves

# loss vs epochs
fig, axs = plt.subplots(1, 2, gridspec_kw={'hspace': 1, 'wspace': 0.5}) 
(ax1, ax2) = axs
ax1.plot(history.history['loss'], label='train')
ax1.plot(history.history['val_loss'], label='validation')
ax1.set_title('lrate=' + str(learning_rate))
ax1.legend(loc="upper right")
ax1.set_xlabel("# of epochs")
ax1.set_ylabel("loss (mse)")

# mae vs epochs
ax2.plot(history.history['mae'], label='train')
ax2.plot(history.history['val_mae'], label='validation')
ax2.set_title('lrate=' + str(learning_rate))
ax2.legend(loc="upper right")
ax2.set_xlabel("# of epochs")
ax2.set_ylabel("MAE")


plt.savefig('my_plot.png')

In [None]:
#Another answer from Github

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import RMSprop

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score

df = pd.read_csv('admissions_data.csv')
#print(df.head())
#print(df.columns)
#print(df.describe())
#print(df.dtypes)

labels = df.iloc[:, -1]
#print(labels)
features = df.iloc[:, 0:-1]
#print(features)
#print(df['University Rating'])
features.drop(['University Rating', 'Serial No.'], axis=1, inplace=True)
#print(features.columns)

features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

model = Sequential()
input = layers.InputLayer(input_shape=(features.shape[1],))
model.add(input)
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1))

#print(model.summary())

opt = Adam(learning_rate=0.05)
model.compile(loss='mse', metrics=['mae'], optimizer=opt)

stop=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=40)


history = model.fit(features_train_scaled, labels_train, epochs=500, batch_size=30, verbose=1, validation_split=0.2, callbacks=[stop])

res_mse, res_mae = model.evaluate(features_test_scaled, labels_test)
print(res_mse, res_mae)

y_pred = model.predict(features_test_scaled)
print('R^2: ', r2_score(labels_test, y_pred))


fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax1.plot(history.history['mae'])
ax1.plot(history.history['val_mae'])
ax1.set_title('model mae')
ax1.set_ylabel('MAE')
ax1.set_xlabel('epoch')
ax1.legend(['train', 'validation'], loc='upper left')

ax2 = fig.add_subplot(2, 1, 2)
ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_title('model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['train', 'validation'], loc='upper left')
 
fig.tight_layout()
fig.savefig('static/images/my_plots.png')