In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
csv_path=os.path.join('Admission_data.csv')
admission_data_full= pd.read_csv(csv_path)

In [None]:
admission_data_full.info()

In [None]:
admission_data=admission_data_full.drop('Serial No.',axis=1)
admission_data.describe()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

admission_data.hist(bins=10,figsize=(20,12))
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

train_set,test_set = train_test_split(admission_data,test_size=0.2,random_state=42)

train_set.plot(kind='scatter',x='GRE Score',y='Chance of Admit ',alpha=0.4,)

In [None]:
train_set = train_set.astype(float)
test_set= test_set.astype(float)
train_set.info()

In [None]:
from pandas.plotting import scatter_matrix
attributes = ['GRE Score','TOEFL Score','SOP','CGPA','Research']

scatter_matrix(admission_data[attributes],figsize=(12,8))

##### Preparing the data for the algorithms

In [None]:
train_labels=train_set['Chance of Admit '].copy()
train_xunt=train_set.drop('Chance of Admit ',axis=1)
test_labels=test_set['Chance of Admit '].copy()
test_xunt=test_set.drop('Chance of Admit ',axis=1)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler1 = MinMaxScaler()
scaler1.fit(train_xunt)
train_x=scaler1.transform(train_xunt)
scaler2 = MinMaxScaler()
scaler2.fit(test_xunt)
test_x=scaler1.transform(test_xunt)

##### Linear regression approach

In [None]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression() 
lin_reg.fit(train_x,train_labels)

In [None]:
train_predicted_values=lin_reg.predict(train_x)

In [None]:
plt.figure(figsize=(16,10))
plt.scatter(train_labels,train_predicted_values)
plt.ylabel('Train predicted values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xlabel('Real values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.title('Linear Regression',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xticks( fontsize='12')
plt.yticks(fontsize='12')


In [None]:
test_predicted_values=lin_reg.predict(test_x)

In [None]:
plt.figure(figsize=(16,10))
plt.scatter(test_labels,test_predicted_values)
plt.ylabel('Test predicted values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xlabel('Real values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xticks( fontsize='12')
plt.yticks(fontsize='12')


In [None]:
train_labels_array=np.array(train_labels)
test_labels_array=np.array(test_labels)
y_error_train=train_labels_array-train_predicted_values
y_error_test=test_labels_array-test_predicted_values

In [None]:
plt.figure(1,figsize=(16,12))
plt.subplot(2,1,1)
plt.title('Error values train data',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xticks( fontsize='12')
plt.yticks(fontsize='12')
plt.plot(y_error_train)
plt.subplot(2,1,2)
plt.title('Error values test data',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xticks( fontsize='12')
plt.yticks(fontsize='12')
plt.plot(y_error_test)

In [None]:
#We can determine the mean squared error of our approach
from sklearn.metrics import mean_squared_error
mse_total=mean_squared_error(test_labels,test_predicted_values)
print('Mean squared error per data point = ' + str (mse_total))

### Let's try to test a Random forest Regressor and determine whether it works better than linear regression

In [None]:
from sklearn.ensemble import RandomForestRegressor

forest_reg = RandomForestRegressor()
forest_reg.fit(train_x,train_labels)
forest_train_predicted_values=forest_reg.predict(train_x)

In [None]:
plt.figure(figsize=(16,10))
plt.scatter(train_labels,forest_train_predicted_values)
plt.ylabel('Train predicted values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xlabel('Real values',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.title('Linear Regression',fontweight='bold',  fontsize='15', horizontalalignment='center')
plt.xticks( fontsize='12')
plt.yticks(fontsize='12')