In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import utils
np.random.seed(0)

# The dataset

In [None]:
data = pd.read_csv('Admission_Predict.csv', index_col=0)
data

In [None]:
data['Admitted'] = data['Chance of Admit'] >= 0.75

In [None]:
data = data.drop(['Chance of Admit'], axis=1)

In [None]:
data

In [None]:
features = data.drop(['Admitted'], axis=1)
labels = data['Admitted']

In [None]:
features

In [None]:
labels

# Training a decision tree

In [None]:
dt = DecisionTreeClassifier()

In [None]:
dt.fit(features, labels)

In [None]:
dt.predict(features[0:5])

In [None]:
dt.score(features, labels)

In [None]:
utils.display_tree(dt)

# Training a smaller tree that doesn't overfit

In [None]:
dt_smaller = DecisionTreeClassifier(max_depth=3, min_samples_leaf=10, min_samples_split=10)

In [None]:
dt_smaller.fit(features, labels)

In [None]:
dt_smaller.score(features, labels)

In [None]:
utils.display_tree(dt_smaller)

### Using the tree to make predictions

In [None]:
dt_smaller.predict([[320,
                     110,
                     3,
                     4.0,
                     3.5,
                     8.9,
                     0]])

In [None]:
# A node in the white (neutral) leaf gets a false prediction
dt_smaller.predict([[320,
                     110,
                     3,
                     4.0,
                     3.5,
                     8.0,
                     0]])

# Training a decision tree with only two features

In [None]:
#exams = data[['GRE Score', 'CGPA']]
exams = data[['GRE Score', 'TOEFL Score']]
exams

In [None]:
utils.plot_points(exams, labels, size_of_points=25)

### Fitting a tree of depth 2

In [None]:
#dt_exams = DecisionTreeClassifier(max_depth=2, min_samples_leaf=10, min_samples_split=10)
dt_exams = DecisionTreeClassifier(max_depth=2)

In [None]:
dt_exams.fit(exams, labels)

In [None]:
utils.plot_model(exams, labels, dt_exams, size_of_points=25)

In [None]:
utils.display_tree(dt_exams)

### Fitting a tree of depth 1

In [None]:
simpler_dt_exams = DecisionTreeClassifier(max_depth=1)
simpler_dt_exams.fit(exams, labels)

In [None]:
utils.plot_model(exams, labels, simpler_dt_exams, size_of_points=25)

In [None]:
utils.display_tree(simpler_dt_exams)

### Fitting a tree of unbounded depth (overfitting)

In [None]:
crazy_dt_exams = DecisionTreeClassifier()
crazy_dt_exams.fit(exams, labels)

In [None]:
utils.plot_model(exams, labels, crazy_dt_exams, size_of_points=25)

In [None]:
utils.display_tree(crazy_dt_exams)