# Training the model

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score
from math import sqrt

In [2]:
df = pd.read_csv('..\Admission_Predict.csv')
df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
495,496,332,108,5,4.5,4.0,9.02,1,0.87
496,497,337,117,5,5.0,5.0,9.87,1,0.96
497,498,330,120,5,4.5,5.0,9.56,1,0.93
498,499,312,103,4,4.0,5.0,8.43,0,0.73


In [3]:
X = df.drop(['Serial No.', 'Chance of Admit '], axis=1)
Y = df['Chance of Admit ']

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [11]:
model = LinearRegression()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

In [21]:
mse = mean_squared_error(Y_test, Y_pred)
rmse = sqrt(mse)
r2 = r2_score(Y_test, Y_pred)
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('R^2 Score:', r2)

Mean Squared Error: 0.0036342178012511005
Root Mean Squared Error: 0.06028447396511892
R^2 Score: 0.8256306039919492


In [25]:
EVS = explained_variance_score(Y_test, Y_pred)
accuracy = round(EVS*100, 2)
print(f'Accuracy = {accuracy}%')

Accuracy = 82.73%


# Pickling

In [28]:
model.predict([[337, 118, 4, 4.5, 4.5, 9.65, 1], [324, 107, 4, 4.0, 4.5, 8.87, 1]])

array([0.95967505, 0.80190743])

In [30]:
import joblib

In [31]:
joblib.dump(model, 'sklearn_LinearRegression.pkl')

['sklearn_LinearRegression.pkl']

In [32]:
model_from_joblib = joblib.load('sklearn_LinearRegression.pkl')

In [33]:
model_from_joblib.predict([[337, 118, 4, 4.5, 4.5, 9.65, 1], [324, 107, 4, 4.0, 4.5, 8.87, 1]])

array([0.95967505, 0.80190743])