In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Admission_Predict.csv')

### Preparing the data

In [3]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [4]:
df.drop(['Serial No.'], axis=1, inplace=True)

In [5]:
df['Acceptance'] = np.where(df['Chance of Admit ']>=0.76, 1, 0)
# 1 = yes
# 0 = no

In [6]:
df.head(7)

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit,Acceptance
0,337,118,4,4.5,4.5,9.65,1,0.92,1
1,324,107,4,4.0,4.5,8.87,1,0.76,1
2,316,104,3,3.0,3.5,8.0,1,0.72,0
3,322,110,3,3.5,2.5,8.67,1,0.8,1
4,314,103,2,2.0,3.0,8.21,0,0.65,0
5,330,115,5,4.5,3.0,9.34,1,0.9,1
6,321,109,3,3.0,4.0,8.2,1,0.75,0


In [7]:
df.tail(7)

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit,Acceptance
393,317,104,2,3.0,3.0,8.76,0,0.77,1
394,329,111,4,4.5,4.0,9.23,1,0.89,1
395,324,110,3,3.5,3.5,9.04,1,0.82,1
396,325,107,3,3.0,3.5,9.11,1,0.84,1
397,330,116,4,5.0,4.5,9.45,1,0.91,1
398,312,103,3,3.5,4.0,8.78,0,0.67,0
399,333,117,4,5.0,4.0,9.66,1,0.95,1


# Build the model

Buiding the model:

1. Define: What type of model will it be? A decision tree?
2. Fit: Capture patterns from provided data.
3. Predict: Just what it sounds like
4. Evaluate: Determine how accurate the model's predictions are

### Define

In [8]:
y = df['Acceptance'].values

In [9]:
X = df.drop(labels=['Acceptance'], axis = 1)

In [10]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators = 10, random_state = 30)

### Fit

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=20)
model.fit(X_train, y_train)

RandomForestClassifier(n_estimators=10, random_state=30)

In [12]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Predict


In [13]:
prediction_test = model.predict(X_test)

#### Let’s check the difference between the actual and predicted values.

In [14]:
df=pd.DataFrame({'Actual':y_test, 'Predicted':prediction_test})
df

Unnamed: 0,Actual,Predicted
0,0,0
1,1,0
2,0,0
3,0,0
4,1,0
...,...,...
115,1,0
116,0,0
117,0,0
118,0,0


#### Let’s plot the difference between the actual and the predicted value.

### Evaluate

In [15]:
from sklearn import metrics
print("Accuracy= ", metrics.accuracy_score(y_test, prediction_test))

Accuracy=  0.6


In [16]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(classification_report(y_test,prediction_test))

              precision    recall  f1-score   support

           0       0.60      1.00      0.75        72
           1       0.00      0.00      0.00        48

    accuracy                           0.60       120
   macro avg       0.30      0.50      0.37       120
weighted avg       0.36      0.60      0.45       120



  _warn_prf(average, modifier, msg_start, len(result))


### Predict new values

In [28]:
new_data = {
    'GRE':[500],
    'TOEFL':[118],
    'UR': [4],
    'SOP': [4.5],
    'LOR': [4.5],
    'CGPA':[9.65],
    'Research': [0.92]
}

new_df = pd.DataFrame(data=new_data)

#Acceptance_predict = model.predict(X)
#print(Acceptance_predict)
#GRE, TOEFL, UR, SOP, LOR, CGPA, Research

In [29]:
new_df

Unnamed: 0,GRE,TOEFL,UR,SOP,LOR,CGPA,Research
0,500,118,4,4.5,4.5,9.65,0.92


In [27]:
model.predict(new_data)

TypeError: float() argument must be a string or a number, not 'dict'

### Reconsider the selection of the features using (model.feature_importances_)

In [None]:
feature_list = list(X.columns)
feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
print(feature_list)