# Linear Regression Multivariate Data: Admisions  

## Importing required libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm


## Loading the Data

In [2]:
df = pd.read_csv('Datasets/Admission_Predict.csv')

## Check the dimensions 

In [3]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [4]:
df.shape

(400, 9)

## Linear Regression 

### Split data

In [5]:
X = df.drop('Chance of Admit ', axis=1)
y = df['Chance of Admit ']

In [6]:
X.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1,337,118,4,4.5,4.5,9.65,1
1,2,324,107,4,4.0,4.5,8.87,1
2,3,316,104,3,3.0,3.5,8.0,1
3,4,322,110,3,3.5,2.5,8.67,1
4,5,314,103,2,2.0,3.0,8.21,0


In [7]:
y.head()

0    0.92
1    0.76
2    0.72
3    0.80
4    0.65
Name: Chance of Admit , dtype: float64

### Train & Test data

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                   test_size = 0.20,
                                                   random_state = 101)

In [10]:
X_train.shape, X_test.shape

((320, 8), (80, 8))

In [11]:
y_train.shape, y_test.shape

((320,), (80,))

### Use Statistics Statsmodels

In [13]:
x_train_with_intercept = sm.add_constant(X_train)
stats_model = sm.OLS(y_train, x_train_with_intercept)
fit_model = stats_model.fit()

In [14]:
fit_model.summary()

0,1,2,3
Dep. Variable:,Chance of Admit,R-squared:,0.819
Model:,OLS,Adj. R-squared:,0.814
Method:,Least Squares,F-statistic:,175.9
Date:,"Thu, 20 Jan 2022",Prob (F-statistic):,1.34e-110
Time:,18:43:28,Log-Likelihood:,443.48
No. Observations:,320,AIC:,-869.0
Df Residuals:,311,BIC:,-835.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.3134,0.134,-9.767,0.000,-1.578,-1.049
Serial No.,0.0002,3.07e-05,5.980,0.000,0.000,0.000
GRE Score,0.0021,0.001,3.320,0.001,0.001,0.003
TOEFL Score,0.0036,0.001,3.063,0.002,0.001,0.006
University Rating,0.0142,0.005,2.654,0.008,0.004,0.025
SOP,0.0011,0.006,0.185,0.853,-0.011,0.013
LOR,0.0183,0.006,3.073,0.002,0.007,0.030
CGPA,0.0956,0.014,6.906,0.000,0.068,0.123
Research,0.0196,0.009,2.254,0.025,0.002,0.037

0,1,2,3
Omnibus:,41.937,Durbin-Watson:,2.246
Prob(Omnibus):,0.0,Jarque-Bera (JB):,63.226
Skew:,-0.821,Prob(JB):,1.87e-14
Kurtosis:,4.43,Cond. No.,15500.0


### Train model

In [16]:
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression(normalize=True).fit(X_train, y_train)

In [17]:
y_pred_train = linear_model.predict(X_train)

## Score

In [18]:
from sklearn.metrics import r2_score

r2_score(y_pred_train, y_train)

0.7789849683351178

In [19]:
y_pred_test = linear_model.predict(X_test)

In [20]:
r2_score(y_pred_test, y_test)

0.7276289374921789