In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df=pd.read_csv("Admission_Prediction.csv")

In [4]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


#check for missing values

In [5]:
df.isnull().sum()

Serial No.            0
GRE Score            15
TOEFL Score          10
University Rating    15
SOP                   0
LOR                   0
CGPA                  0
Research              0
Chance of Admit       0
dtype: int64

In [18]:
df['University Rating'].mode()

0    3.0
Name: University Rating, dtype: float64

In [19]:
df['GRE Score'].fillna(df['GRE Score'].mean(), inplace = True)
df['TOEFL Score'].fillna(df['TOEFL Score'].mean(), inplace = True)
df['University Rating'].fillna(df['University Rating'].mode()[0], inplace = True)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No.         500 non-null    int64  
 1   GRE Score          500 non-null    float64
 2   TOEFL Score        500 non-null    float64
 3   University Rating  500 non-null    float64
 4   SOP                500 non-null    float64
 5   LOR                500 non-null    float64
 6   CGPA               500 non-null    float64
 7   Research           500 non-null    int64  
 8   Chance of Admit    500 non-null    float64
dtypes: float64(7), int64(2)
memory usage: 35.3 KB


In [22]:
# seeing that after imputation no column has missing values
df.isna().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [23]:
x = df.drop(['Chance of Admit', 'Serial No.'], axis = 1)
y = df['Chance of Admit']

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
train_x, test_x, train_y, test_y = train_test_split(x,y,test_size=0.33, random_state=33)

In [29]:
###Linear regression
from sklearn.linear_model import LinearRegression,RidgeCV, LassoCV, ElasticNet
regression = LinearRegression()
regression.fit(train_x, train_y)

LinearRegression()

In [32]:
predict_y = regression.predict(test_x)

In [33]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_y,predict_y)*100

0.33996235009316134

In [42]:
from sklearn.metrics import r2_score
score = r2_score(test_y, predict_y)
score

0.8120324156608805

# SVR implementation

In [36]:
from sklearn.svm import SVR

In [44]:
svr = SVR(kernel= "linear")

In [45]:
svr.fit(train_x, train_y)

SVR(kernel='linear')

In [46]:
predict_y_svr = svr.predict(test_x)

In [47]:
mean_squared_error(test_y, predict_y_svr)*100

0.4142904609310956

In [48]:
from sklearn.metrics import r2_score
score = r2_score(test_y, predict_y_svr)
score

0.7709358782388153

# Hyperparameter tuning for SVR

In [49]:
param_grid={'C':[0.1,1,10,50,100,500], 'gamma':[1,0.5,0.1,0.01,0.001],"kernel":['linear','rbf','ploy','sigmoid']}

In [51]:
from sklearn.model_selection import GridSearchCV

In [52]:
grid= GridSearchCV(SVR(),param_grid, verbose = 3)

In [53]:
grid.fit(train_x,train_y)

Fitting 5 folds for each of 120 candidates, totalling 600 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.760 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.836 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.636 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.805 total time=   0.0s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.092 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.126 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.079 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.089 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.054 total time=   0.0s
[CV 1/5] END .........C=0.1, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END .........C=0.1, gamma=1, kernel=p

[CV 2/5] END .......C=1, gamma=1, kernel=linear;, score=0.773 total time=   0.0s
[CV 3/5] END .......C=1, gamma=1, kernel=linear;, score=0.835 total time=   0.0s
[CV 4/5] END .......C=1, gamma=1, kernel=linear;, score=0.639 total time=   0.0s
[CV 5/5] END .......C=1, gamma=1, kernel=linear;, score=0.821 total time=   0.0s
[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.124 total time=   0.0s
[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.123 total time=   0.0s
[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.093 total time=   0.0s
[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.111 total time=   0.0s
[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.064 total time=   0.0s
[CV 1/5] END ...........C=1, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END ...........C=1, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 3/5] END ...........C=1, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 4/5] END ...........C=1,

[CV 4/5] END .....C=10, gamma=1, kernel=linear;, score=-2.039 total time=   0.4s
[CV 5/5] END .....C=10, gamma=1, kernel=linear;, score=-2.067 total time=   0.7s
[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.124 total time=   0.0s
[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.123 total time=   0.0s
[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.093 total time=   0.0s
[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.111 total time=   0.0s
[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.064 total time=   0.0s
[CV 1/5] END ..........C=10, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END ..........C=10, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 3/5] END ..........C=10, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 4/5] END ..........C=10, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 5/5] END ..........C=10, gamma=1, kernel=ploy;, score=nan total time=   0.0s
[CV 1/5] END ....C=10, gamma

[CV 1/5] END ..C=50, gamma=0.5, kernel=linear;, score=-33.112 total time=   0.8s
[CV 2/5] END ..C=50, gamma=0.5, kernel=linear;, score=-41.054 total time=   0.8s
[CV 3/5] END ..C=50, gamma=0.5, kernel=linear;, score=-57.979 total time=   0.5s
[CV 4/5] END ..C=50, gamma=0.5, kernel=linear;, score=-44.312 total time=   0.8s
[CV 5/5] END ..C=50, gamma=0.5, kernel=linear;, score=-64.026 total time=   0.6s
[CV 1/5] END .......C=50, gamma=0.5, kernel=rbf;, score=0.281 total time=   0.0s
[CV 2/5] END .......C=50, gamma=0.5, kernel=rbf;, score=0.284 total time=   0.0s
[CV 3/5] END .......C=50, gamma=0.5, kernel=rbf;, score=0.239 total time=   0.0s
[CV 4/5] END .......C=50, gamma=0.5, kernel=rbf;, score=0.199 total time=   0.0s
[CV 5/5] END .......C=50, gamma=0.5, kernel=rbf;, score=0.144 total time=   0.0s
[CV 1/5] END ........C=50, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END ........C=50, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 3/5] END ........C=50, g

[CV 3/5] END .C=100, gamma=0.5, kernel=linear;, score=-94.429 total time=   0.5s
[CV 4/5] END C=100, gamma=0.5, kernel=linear;, score=-474.236 total time=   0.5s
[CV 5/5] END C=100, gamma=0.5, kernel=linear;, score=-205.068 total time=   1.0s
[CV 1/5] END ......C=100, gamma=0.5, kernel=rbf;, score=0.281 total time=   0.0s
[CV 2/5] END ......C=100, gamma=0.5, kernel=rbf;, score=0.284 total time=   0.0s
[CV 3/5] END ......C=100, gamma=0.5, kernel=rbf;, score=0.239 total time=   0.0s
[CV 4/5] END ......C=100, gamma=0.5, kernel=rbf;, score=0.199 total time=   0.0s
[CV 5/5] END ......C=100, gamma=0.5, kernel=rbf;, score=0.144 total time=   0.0s
[CV 1/5] END .......C=100, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END .......C=100, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 3/5] END .......C=100, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 4/5] END .......C=100, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 5/5] END .......C=100, g

[CV 4/5] END C=500, gamma=0.5, kernel=linear;, score=-12664.194 total time=   0.4s
[CV 5/5] END C=500, gamma=0.5, kernel=linear;, score=-6683.509 total time=   0.3s
[CV 1/5] END ......C=500, gamma=0.5, kernel=rbf;, score=0.281 total time=   0.0s
[CV 2/5] END ......C=500, gamma=0.5, kernel=rbf;, score=0.284 total time=   0.0s
[CV 3/5] END ......C=500, gamma=0.5, kernel=rbf;, score=0.239 total time=   0.0s
[CV 4/5] END ......C=500, gamma=0.5, kernel=rbf;, score=0.199 total time=   0.0s
[CV 5/5] END ......C=500, gamma=0.5, kernel=rbf;, score=0.144 total time=   0.0s
[CV 1/5] END .......C=500, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 2/5] END .......C=500, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 3/5] END .......C=500, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 4/5] END .......C=500, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 5/5] END .......C=500, gamma=0.5, kernel=ploy;, score=nan total time=   0.0s
[CV 1/5] END .C=500, gamm

150 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
150 fits failed with the following error:
Traceback (most recent call last):
  File "c:\users\acer\appdata\local\programs\python\python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\acer\appdata\local\programs\python\python39\lib\site-packages\sklearn\svm\_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "c:\users\acer\appdata\local\programs\python\python39\lib\site-packages\sklearn\svm\_base.py", line 315, in _dense_fit
    ) = libsvm.fit(
  File "sklearn\svm\_libsvm.pyx", line 173, in sklea

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.1, 1, 10, 50, 100, 500],
                         'gamma': [1, 0.5, 0.1, 0.01, 0.001],
                         'kernel': ['linear', 'rbf', 'ploy', 'sigmoid']},
             verbose=3)

In [54]:
grid.best_estimator_

SVR(C=1, gamma=1, kernel='linear')

In [55]:
svr_new=SVR(C=1, gamma=1, kernel='linear')

In [56]:
svr_new.fit(train_x, train_y)

SVR(C=1, gamma=1, kernel='linear')

In [57]:
y_predict=svr_new.predict(test_x)

In [61]:
mean_squared_error(test_y,y_predict)*100

0.4142904609310956

# model 3

In [62]:
svr_new1=SVR(C=50, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma= 0.001, kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose =False)

In [63]:
svr_new1.fit(train_x, train_y)

SVR(C=50, gamma=0.001)

In [64]:
mean_squared_error(test_y,svr_new1.predict(test_x))*100

0.4599286932623946

In [66]:
score_new=r2_score(test_y, svr_new1.predict(test_x))

In [67]:
score_new

0.7457021772643664