In [None]:
#importing required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("Admission_Predict.csv")

In [None]:
# first 5 instances of dataset

data.head()

In [None]:
# last 5 instances of dataset

data.tail()

In [None]:
#Shape of dataframe

data.shape

In [None]:
#columns in dataframe

data.columns

In [None]:
# droping Id column

data.drop("Serial No.",axis=1,inplace=True)


In [None]:
# Checking Dataset

data

In [None]:
#Applying  necessary function on "Chance of Admit"

data["Chance of Admit "]=data["Chance of Admit "].apply(lambda x: 1 if x>0.5 else 0)

In [None]:
# Checking Dataset

data

In [None]:
#Find missing values
print("Missing values:\n")
data.isnull().sum()

In [None]:
# info about dataset

data.info()

In [None]:
#Correlation among dataset

data.corr()

In [None]:
#Correlation heatmap

plt.figure(figsize=(6,6))
sns.heatmap(data.corr(), annot=True, cmap='Oranges')
plt.show()

In [None]:
data.hist(bins = 50,figsize = (15,11));

In [None]:
# Calculating total Count

data_admit = data[data['Chance of Admit ']==1]
data_non_admit = data[data['Chance of Admit ']==0]
print("Admitted count       : " ,data_admit.shape[0])
print("Non - Admitted count : " ,data_non_admit.shape[0])

In [None]:
# pie chart for "Chance of Admit"

data['Chance of Admit '].value_counts().plot(kind='pie',figsize=(5,5),autopct='%1.1f%%')
plt.title("Chance of Admit in total")
plt.show()


In [None]:
# pie chart for LOR

data['LOR '].value_counts().plot(kind='pie',figsize=(5,5),autopct='%1.1f%%')
plt.title("LOR Point Chart")
plt.show()


In [None]:
# pie chart for SOP

data['SOP'].value_counts().plot(kind='pie',figsize=(5,5),autopct='%1.1f%%')
plt.title("SOP Point Chart")
plt.show()


In [None]:
# pie chart for "University Rating"

data["University Rating"].value_counts().plot(kind='pie',figsize=(5,5),autopct='%1.1f%%')
plt.title("University Rating Chart")
plt.show()


In [None]:
#highest GRE score
print("maximum GRE Score : ",data['GRE Score'].max())
#lowest GRE score
print("minimum GRE Score : ",data['GRE Score'].min())

In [None]:
#pairplot based on hue = "Research"

sns.pairplot(data,hue = "Research")

In [None]:
#pairplot based on hue = "SOP"

sns.pairplot(data,hue = "SOP");

In [None]:
#pairplot based on hue = "University Rating"

sns.pairplot(data,hue = "University Rating");

In [None]:
#pairplot for dataset

sns.pairplot(data)

In [None]:
# dependent and independent feature

X= data.drop("Chance of Admit ",axis =1 )
y= data["Chance of Admit "]

In [None]:
# unique in independent feature

X.nunique()

In [None]:
# Splitting the dataset into train and test sets: 80-20 split

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state=42)

# Shape of train Test Split
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

In [None]:
# Decision Tree Classifier model 
from sklearn.tree import DecisionTreeClassifier

# instantiate the model 
tree = DecisionTreeClassifier()

# fit the model 
tree.fit(X_train, y_train)

In [None]:
#predicting the target value from the model for the samples

y_train_tree = tree.predict(X_train)
y_test_tree = tree.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
#computing the accuracy of the model performance
acc_train_tree = accuracy_score(y_train,y_train_tree)
acc_test_tree = accuracy_score(y_test,y_test_tree)


print("Decision Tree : Accuracy on training Data: {:.3f}".format(acc_train_tree))
print("Decision Tree : Accuracy on test Data: {:.3f}".format(acc_test_tree))

In [None]:
from sklearn.metrics import classification_report
#computing the classification report of the model

print(classification_report(y_test, y_test_tree))

In [None]:
plt.barh(X.columns,tree.feature_importances_)
plt.title("Feature Importances while constructing Tree")
plt.show()

In [None]:
#visualization of Confusion Matrix 
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_test_tree)
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(4,4))
sns.heatmap(cmn, annot=True, fmt='.2f',cmap='Oranges')
plt.title("Confusion Matrix")
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show(block=False);

In [None]:
training_accuracy = []
test_accuracy = []
# try max_depth from 1 to 15
depth = range(1,16)
for n in depth:
    tree_test = DecisionTreeClassifier(max_depth=n)
    tree_test.fit(X_train, y_train)
    # record training set accuracy
    training_accuracy.append(tree_test.score(X_train, y_train))
    # record generalization accuracy
    test_accuracy.append(tree_test.score(X_test, y_test))
    

#plotting the training & testing accuracy for max_depth from 1 to 15
plt.plot(depth, training_accuracy, label="training accuracy")
plt.plot(depth, test_accuracy, label="test accuracy")
plt.title("Accuracy vs max_depth")
plt.ylabel("Accuracy")  
plt.xlabel("max_depth")
plt.legend();

In [None]:
from sklearn.tree import export_text
from sklearn.tree import DecisionTreeClassifier

# instantiate the model 
tree = DecisionTreeClassifier(max_depth=3)

# fit the model 
tree.fit(X_train, y_train)
text_representation = export_text(tree)
print(text_representation)

In [None]:
# visualiazation of tree

import sklearn.tree as tr
fig = plt.figure(figsize=(20,15))
_ = tr.plot_tree(tree, 
                 feature_names=X.columns,  
                 class_names=np.array(["Non admit","Admit"]),
                 filled=True)
