In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge,Lasso,RidgeCV,LassoCV,LinearRegression,ElasticNet,ElasticNetCV
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import seaborn as sns

In [2]:
df=pd.read_csv("Admission_Prediction.csv")

In [3]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [None]:
from pandas_profiling import ProfileReport

In [None]:
pf=ProfileReport(df)

In [None]:
pf.to_widgets()

In [4]:
#missing value
df.isnull().sum()

Serial No.            0
GRE Score            15
TOEFL Score          10
University Rating    15
SOP                   0
LOR                   0
CGPA                  0
Research              0
Chance of Admit       0
dtype: int64

In [5]:
df['GRE Score'].fillna(value=df['GRE Score'].mean(),inplace=True)
df['TOEFL Score'].fillna(value=df['TOEFL Score'].mean(),inplace=True)
df['University Rating'].fillna(value=df['University Rating'].mean(),inplace=True)

In [6]:
df.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [7]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,316.558763,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [8]:
#drop the column
df.drop(columns=['Serial No.'],axis=1,inplace=True)

In [9]:
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,316.558763,104.0,3.0,3.0,3.5,8.0,1,0.72
3,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GRE Score          500 non-null    float64
 1   TOEFL Score        500 non-null    float64
 2   University Rating  500 non-null    float64
 3   SOP                500 non-null    float64
 4   LOR                500 non-null    float64
 5   CGPA               500 non-null    float64
 6   Research           500 non-null    int64  
 7   Chance of Admit    500 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 31.4 KB


In [11]:
#feature(x) and label(y)
x=df.iloc[:,:-1]

In [12]:
y=df.iloc[:,-1]

In [13]:
scaler=StandardScaler()

In [14]:
arr=scaler.fit_transform(x)

In [15]:
df1=pd.DataFrame(arr)

In [16]:
df1.head()

Unnamed: 0,0,1,2,3,4,5,6
0,1.842741,1.788542,0.778906,1.13736,1.098944,1.776806,0.886405
1,0.6708143,-0.031058,0.778906,0.632315,1.098944,0.485859,0.886405
2,5.124333e-15,-0.527313,-0.107877,-0.377773,0.017306,-0.954043,0.886405
3,0.4905178,0.465197,-0.107877,0.127271,-1.064332,0.154847,0.886405
4,-0.2306679,-0.692731,-0.994659,-1.387862,-0.523513,-0.60648,-1.128152


In [17]:
#for Multicollinearity
from statsmodels.stats.outliers_influence import variance_inflation_factor
vif_df=pd.DataFrame()

In [18]:
vif_df['vif']=[variance_inflation_factor(arr,i) for i in range(arr.shape[1])]

In [19]:
vif_df['feature']=x.columns

In [20]:
#all score are less than 10 so there is no multicollinearity
vif_df

Unnamed: 0,vif,feature
0,4.153268,GRE Score
1,3.792866,TOEFL Score
2,2.508768,University Rating
3,2.77575,SOP
4,2.037308,LOR
5,4.65167,CGPA
6,1.459311,Research


In [21]:
x_train,x_test,y_train,y_test=train_test_split(arr,y,test_size=0.25,random_state=42)

In [22]:
x_train

array([[-0.41096436,  0.46519653, -0.99465886, ..., -0.52351283,
        -0.07686099, -1.12815215],
       [-0.05037148, -0.69273097, -0.1078766 , ..., -1.60515091,
        -1.48366203, -1.12815215],
       [ 1.5722965 ,  2.11937866,  1.66568791, ...,  1.63976333,
         2.14091948,  0.88640526],
       ...,
       [-1.31244657, -1.35440382, -1.88144112, ..., -1.60515091,
        -2.19533785, -1.12815215],
       [-0.68140903, -0.36189454, -0.99465886, ...,  0.55812525,
        -1.48366203, -1.12815215],
       [-0.23066792, -0.19647633, -0.99465886, ...,  0.01730621,
        -0.5402778 , -1.12815215]])

In [23]:
lr=LinearRegression()

In [24]:
lr.fit(x_train,y_train)

LinearRegression()

In [25]:
import pickle

In [26]:
pickle.dump(lr,open('admission_lr_model.pkl','wb'))

In [27]:
test1=scaler.transform(x)

In [28]:
model=pickle.load(open('admission_lr_model.pkl','rb'))

In [None]:
model.predict(test1)

In [29]:
#r^2 score
lr.score(x_test,y_test)

0.8175497115836481

In [30]:
#create a function to create adjusted R-Squared
def adt_r2(x,y):
    r2=lr.score(x,y)
    n=x.shape[0]
    p=x.shape[1]
    adjusted_r2=1-(1-r2)*(n-1)/(n-p-1)
    return adjusted_r2

In [31]:
adt_r2(x_test,y_test)

0.8066338823621569

In [32]:
lr.coef_

array([0.022218  , 0.01962107, 0.00252948, 0.00128856, 0.01538998,
       0.07278466, 0.01299267])

In [33]:
lr.intercept_

0.722535407687509

In [49]:
lassocv=LassoCV(alphas=None,cv=5,max_iter=20000000,normalize=True)

In [50]:
lassocv.fit(x_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lasso())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 


LassoCV(cv=5, max_iter=20000000, normalize=True)

In [51]:
lassocv.alpha_

5.9937188109793197e-05

In [52]:
lasso=Lasso(alpha=lassocv.alpha_)
lasso.fit(x_train,y_train)

Lasso(alpha=5.9937188109793197e-05)

In [53]:
lasso.score(x_test,y_test)

0.8175868503894765

In [65]:
ridgecv=RidgeCV(alphas=np.random.uniform(0,10,50),cv=5,normalize=True)
ridgecv.fit(x_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

RidgeCV(alphas=array([3.33044923, 7.58295712, 7.36472818, 0.78666577, 9.44734074,
       4.54755084, 7.38715367, 0.54446934, 1.14595231, 0.60084199,
       1.17381398, 2.09568778, 2.63790497, 9.36114039, 9.18992335,
       0.25537275, 5.56053526, 4.98573722, 5.93764836, 9.35786332,
       1.3675058 , 2.07695732, 1.12814414, 8.26406196, 0.15132688,
       3.7992662 , 6.37123378, 1.42371953, 4.19528212, 9.69681436,
       8.32407508, 0.39167576, 8.59220325, 4.11839644, 1.48880335,
       6.12601959, 0.99334134, 7.07648808, 1.9622393 , 5.55583914,
       9.24169379, 0.53424109, 4.02271791, 1.48393468, 3.96959379,
       6.61786672, 5.32129716, 1.1804406 , 4.94093427, 8.38658548]),
        cv=5, normalize=True)

In [66]:
ridgecv.alpha_

0.151326877117699

In [64]:
np.random.uniform(0,10,50)

array([7.54154298, 6.73038659, 2.23610312, 2.09162345, 1.06072022,
       1.37395939, 2.51458966, 8.38556614, 3.01004205, 6.44233388,
       1.59986593, 1.54567146, 3.70017574, 3.43352636, 1.17846799,
       2.27257591, 3.1209116 , 1.83463381, 4.52008712, 4.42140105,
       6.94089284, 8.41058257, 8.10828402, 3.70103063, 2.34958054,
       4.51119901, 2.18768199, 4.80807737, 8.01901252, 0.05348839,
       1.88506331, 4.43374305, 7.45537316, 3.649486  , 2.54716937,
       1.78391696, 3.16990409, 9.70574162, 2.52588656, 4.17065954,
       0.60252585, 2.37483088, 2.60998271, 9.14465981, 4.6654473 ,
       6.44729126, 2.40952347, 2.48352433, 0.78723462, 9.7669634 ])

In [67]:
ridge_lr=Ridge(alpha=ridgecv.alpha_)
ridge_lr.fit(x_train,y_train)

Ridge(alpha=0.151326877117699)

In [68]:
ridge_lr.score(x_test,y_test)

0.8175645543017234

In [71]:
elastic=ElasticNetCV(alphas=None,cv=5)
elastic.fit(x_train,y_train)

ElasticNetCV(cv=5)

In [72]:
elastic.alpha_

0.002008402769501015

In [73]:
elastic.l1_ratio_

0.5

In [74]:
elastic_lr=ElasticNet(alpha=elastic.alpha_,l1_ratio=elastic.l1_ratio_)

In [75]:
elastic_lr.fit(x_train,y_train)

ElasticNet(alpha=0.002008402769501015)

In [76]:
elastic_lr.score(x_test,y_test)

0.8180883251460256