# We optimize the parameters of the decision tree classifier in order to obtain the best results, using a Grid Search


In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/BriceNW/datasets/main/Admission.csv')

In [None]:
df.drop(columns=['Serial No.'],inplace=True)
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [None]:
df['Chance of Admit ']=df['Chance of Admit '].apply(lambda x: 1 if x>=0.75 else 0)

In [None]:
df['University Rating']=df['University Rating'].astype(object)

In [None]:
X = df.drop(['Chance of Admit'], axis = 1)
y = df['Chance of Admit']

In [None]:
X = pd.get_dummies(X, drop_first=True)

In [None]:
from sklearn.model_selection import train_test_split

# Stratify =y : permet de conserver la même proportion de valeur 1 et 0 pour y comme les deux catégories ne sont pas équitablement représentées
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,stratify=y, random_state=63)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
# We specify the weights
weights = np.linspace(0.05,0.95,10)
weights

array([0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95])

In [None]:
#We create a dictionary of parameters that need to be optimized and apply a grid search to it
from sklearn.model_selection import GridSearchCV

weights = np.linspace(0.05,0.95,10)

tree = DecisionTreeClassifier()
params={
    'class_weight' : [{0 : w, 1 : 1-w} for w in weights],
    'criterion' : ['gini','entropy'],
    'max_depth' : np.arange(1,11),
    'min_samples_leaf' : np.arange(1,5)
}

grid_tree = GridSearchCV(tree, param_grid=params, cv=10) 

In [None]:
grid_tree.fit(X_train,y_train)

GridSearchCV(cv=10, estimator=DecisionTreeClassifier(),
             param_grid={'class_weight': [{0: 0.05, 1: 0.95},
                                          {0: 0.15, 1: 0.85},
                                          {0: 0.25, 1: 0.75},
                                          {0: 0.35, 1: 0.65},
                                          {0: 0.44999999999999996, 1: 0.55},
                                          {0: 0.5499999999999999,
                                           1: 0.45000000000000007},
                                          {0: 0.65, 1: 0.35},
                                          {0: 0.75, 1: 0.25},
                                          {0: 0.85, 1: 0.15000000000000002},
                                          {0: 0.95, 1: 0.050000000000000044}],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
                         'min_samples_leaf': array([1, 2, 3

In [None]:
grid_tree.best_params_

{'class_weight': {0: 0.05, 1: 0.95},
 'criterion': 'gini',
 'max_depth': 1,
 'min_samples_leaf': 1}

We have found the best parameters

In [None]:
best_tree = DecisionTreeClassifier(criterion='gini',max_depth=1,min_samples_leaf=1,class_weight={0:0.05,1:0.95})