## **Import necessary libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.optimizers import Adam

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer


## Load & Inspect Data

In [None]:
dataset = pd.read_csv("admissions_data.csv")
dataset.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [None]:
dataset.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,316.472,107.192,3.114,3.374,3.484,8.57644,0.56,0.72174
std,144.481833,11.295148,6.081868,1.143512,0.991004,0.92545,0.604813,0.496884,0.14114
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,125.75,308.0,103.0,2.0,2.5,3.0,8.1275,0.0,0.63
50%,250.5,317.0,107.0,3.0,3.5,3.5,8.56,1.0,0.72
75%,375.25,325.0,112.0,4.0,4.0,4.0,9.04,1.0,0.82
max,500.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [None]:
dataset.columns

Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')

## Split into features and labels

In [None]:
#Serial number not relevant, everything else considered relevant
features = dataset.iloc[:, 1:-1]
labels = dataset.iloc[:, -1]

In [None]:
#split into train and test 
#no one-hot-encoding required as all data is numeric and not categorical
features_train, features_test, labels_train, labels_test = train_test_split(features, labels)

## Scale data 

In [None]:
features_columns = features.columns

In [None]:
ct = ColumnTransformer([("scale", StandardScaler(), features.columns)], remainder="passthrough")

In [None]:
features_train_scaled = ct.fit_transform(features_train)

In [None]:
features_test_scaled = ct.transform(features_test)

##Create Model

In [None]:
def make_model():
  model = Sequential(name="model")
  input = InputLayer(input_shape=(features_train.shape[1]))
  model.add(input)
  model.add(layers.Dense(64, activation="relu"))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(64, activation="relu"))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(64, activation="relu"))
  model.add(layers.Dense(1))
  opt = Adam(learning_rate=0.01)
  model.compile(loss="mse", metrics="mae", optimizer=opt)
  return model

### Early Stopping

In [None]:
es = EarlyStopping(monitor="val_loss", mode="min", patience=40)

## Fit and Evaluate

In [None]:
model  = make_model()
model.fit(features_train_scaled, labels_train, epochs=100, batch_size=4, validation_split=0.2, verbose=0, callbacks=[es])

<tensorflow.python.keras.callbacks.History at 0x7f8a29dcb9e8>

In [None]:
mse, mae = model.evaluate(features_test_scaled, labels_test)



In [None]:
print(mse, mae)

0.003947214689105749 0.04829179123044014


In [None]:
plt.plot(model.history["mae"], label="training")
plt.plot(model.history["val_mae"], label="validation")
plt.title("Model MAE")
plt.xlabel("Epochs")
plt.ylabel("MAE")

AttributeError: ignored

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from scipy.stats import randint as sp_randint

In [None]:
param_grid = {"batch_size": sp_randint(2,16), "nb_epoch": sp_randint(10,100)}

In [None]:
model = KerasRegressor(build_fn=make_model)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid, scoring=make_scorer(mean_squared_error, greater_is_better=False), n_iter=20)

In [None]:
grid_result = grid.fit(features_train_scaled, labels_train, verbose=0)
print(grid_result)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7f8a29f5d2b0>,
                   iid='deprecated', n_iter=20, n_jobs=None,
                   param_distributions={'batch_size': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f8a279da438>,
                                        'nb_epoch': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f8a279da898>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False,
                   scoring=make_scorer(mean_squared_error, greater_is_better=False),
                   verbose=0)


In [None]:
print(grid_result.best_score_, grid_result.best_params_)

-0.010236461047456819 {'batch_size': 3, 'nb_epoch': 71}
