<a href="https://colab.research.google.com/github/chavalvit-k/Admissions-Prediction/blob/main/Admissions_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and Import Libraries

In [50]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [62]:
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyRegressor
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.optimizers import Adam

from scikeras.wrappers import KerasRegressor

# Import Dataset

In [2]:
df = pd.read_csv("admissions_data.csv")
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


# Explore Dataset

In [3]:
df.drop(["Serial No."], axis=1).describe()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,316.472,107.192,3.114,3.374,3.484,8.57644,0.56,0.72174
std,11.295148,6.081868,1.143512,0.991004,0.92545,0.604813,0.496884,0.14114
min,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,308.0,103.0,2.0,2.5,3.0,8.1275,0.0,0.63
50%,317.0,107.0,3.0,3.5,3.5,8.56,1.0,0.72
75%,325.0,112.0,4.0,4.0,4.0,9.04,1.0,0.82
max,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


# Split Training Set and Testing Set

In [4]:
X = df.iloc[:, 1:-1]
y = df.iloc[:, -1]

In [5]:
X.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1
1,324,107,4,4.0,4.5,8.87,1
2,316,104,3,3.0,3.5,8.0,1
3,322,110,3,3.5,2.5,8.67,1
4,314,103,2,2.0,3.0,8.21,0


In [6]:
y.head()

Unnamed: 0,Chance of Admit
0,0.92
1,0.76
2,0.72
3,0.8
4,0.65


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42)

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build Regression Model

In [86]:
def build_model(neurons = 8):
  model = Sequential([
    InputLayer(shape = (X_train_scaled.shape[1],)),
    Dense(neurons, activation = "relu"),
    Dense(1)
  ])

  model.compile(optimizer = Adam(learning_rate = 0.01), loss = "mse", metrics = ["mae"])

  return model

In [120]:
def build_best_model():
  model = KerasRegressor(model=build_model, verbose = 0)

  param_grid = {
    "batch_size": [1, 4],
    "epochs": [10, 20, 30],
    "model__neurons": [4, 8]
  }

  grid = GridSearchCV(estimator = model, param_grid = param_grid, scoring = make_scorer(mean_squared_error, greater_is_better = False), return_train_score = True, cv = 3)
  grid_result = grid.fit(X_train_scaled, y_train, verbose = 0)

  print("Best score:", grid_result.best_score_)
  print("Best result:", grid_result.best_params_)

  best_params = grid_result.best_params_
  best_model = build_model(neurons = best_params["model__neurons"])

  best_model.fit(X_train_scaled, y_train, batch_size = best_params["batch_size"], epochs = best_params["epochs"], verbose=1)

  return best_model

In [121]:
best_model = build_best_model()
best_model.summary()

Best score: -0.004388642463013102
Best result: {'batch_size': 1, 'epochs': 20, 'model__neurons': 8}
Epoch 1/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.3727 - mae: 0.4060
Epoch 2/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0073 - mae: 0.0676
Epoch 3/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0050 - mae: 0.0556
Epoch 4/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0046 - mae: 0.0519
Epoch 5/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0055 - mae: 0.0563
Epoch 6/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0043 - mae: 0.0477
Epoch 7/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0046 - mae: 0.0517
Epoch 8/20
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0

# Evaluate Model

In [122]:
mse, mae = best_model.evaluate(X_test_scaled, y_test)
print("MSE: ", mse)
print("MAE: ", mae)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0050 - mae: 0.0527  
MSE:  0.004604737740010023
MAE:  0.05091604217886925


In [125]:
dummy_regr = DummyRegressor(strategy="mean")
dummy_regr.fit(X_train_scaled, y_train)
y_pred = dummy_regr.predict(X_test_scaled)
baseline_MAE = mean_absolute_error(y_test, y_pred)
print("Baseline MAE:", baseline_MAE)

Baseline MAE: 0.116268


In [127]:
y_pred = best_model.predict(X_test_scaled)
r_square = r2_score(y_test, y_pred)
print("R-Square:", r_square)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
R-Square: 0.7748294486105487
