## Decision Tree

In [1]:
# Importing the required packages and libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier



In [2]:
# Reading test CSV file directly from Web, and store it in a pandas DataFrame:
test_df=pd.read_csv("D:/Fauzan/Study PhD/Semester 1/Machine Learning/Homework/HW4/spooky-author-identification/spooky-author-identification/test/test.csv")

# Print first 5 rows
test_df.head()

Unnamed: 0,id,text
0,id02310,"Still, as I urged our leaving Ireland with suc..."
1,id24541,"If a fire wanted fanning, it could readily be ..."
2,id00134,And when they had broken down the frail door t...
3,id27757,While I was thinking how I should possibly man...
4,id04081,I am not sure to what limit his knowledge may ...


In [3]:
# Find unique value counts of each author
test_df.value_counts()

id       text                                                                                                                                                                                                         
id00008  And then there was 'The Man in the Bell,' a paper by the by, Miss Zenobia, which I cannot sufficiently recommend to your attention.                                                                              1
id18869  "Good God" I whispered, "can you do that for any time?"                                                                                                                                                          1
id18864  "While I improved in speech, I also learned the science of letters as it was taught to the stranger, and this opened before me a wide field for wonder and delight.                                              1
id18863  It is possible indeed it is far more than probable that he was innocent of all participation in the bloody transacti

In [4]:
# Reading train CSV file directly from Web, and store it in a pandas DataFrame:
train_df = pd.read_csv('D:/Fauzan/Study PhD/Semester 1/Machine Learning/Homework/HW4/spooky-author-identification/spooky-author-identification/train/train.csv')

# Print first 5 rows
train_df.head()

Unnamed: 0,id,text,author
0,id26305,"This process, however, afforded me no means of...",EAP
1,id17569,It never once occurred to me that the fumbling...,HPL
2,id11008,"In his left hand was a gold snuff box, from wh...",EAP
3,id27763,How lovely is spring As we looked from Windsor...,MWS
4,id12958,"Finding nothing else, not even gold, the Super...",HPL


In [5]:
# Find unique value counts of each author
train_df['author'].value_counts()

EAP    7900
MWS    6044
HPL    5635
Name: author, dtype: int64

In [6]:
# Split into features and labels
X_train = train_df['text']
y_train = train_df['author']
X_test = test_df['text']

In [7]:
# Transform text data into vectors to run it through machine learning model
vectorizer = CountVectorizer(stop_words='english')
corpus = pd.concat([train_df['text'], test_df['text']])
vectorizer.fit(corpus)

CountVectorizer(stop_words='english')

In [8]:
# Randomly splitting the original dataset into training set and testing set
X_tr, X_tt, y_tr, y_tt = train_test_split(X_train, y_train, test_size=0.3, random_state=2)

X_tr_new = vectorizer.transform(X_tr)
X_tt_new = vectorizer.transform(X_tt)

### Experiment 1

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import fbeta_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RepeatedStratifiedKFold

param_distribution = {
    'max_depth': np.arange(1, 15),
}

scoring = {    
    'Accuracy': make_scorer(accuracy_score),
    'F1_Score': make_scorer(fbeta_score, beta=1, average='micro'),    
}

In [11]:
result = []
kf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2)
for fold, (train_index, test_index) in enumerate(kf.split(X_tr, y_tr)):
    X_tr, X_tst = X_tr_new[train_index], X_tr_new[test_index]
    y_tr, y_tst = y_tr[train_index], y_tr[test_index]

    for i in range(1, 20):
        # train
        pca = PCA(i)
        X_t = pca.fit_transform(X_tr)
        search_cv = RandomizedSearchCV(DecisionTreeClassifier(), param_distribution,
                                       scoring=scoring, n_jobs=-1, 
                                       cv=RepeatedStratifiedKFold(n_splits=2, n_repeats=2), 
                                       refit='F1_Score') 
        search_cv.fit(X_t, y_tr)
        model = search_cv.best_estimator_        

        # test
        X_t = pca.transform(X_tst)
        y_pred = model.predict(X_t)

        # model evaluation
        f1 = fbeta_score(y_tst, y_pred, beta=1)
        acc = accuracy_score(y_tst, y_pred)
        
        print(f"fold: {fold} - cp:{i} train: {search_cv.best_score_} test: f1={f1}, acc={acc}")

        result.append((fold, i, acc, f1, pca, model))

KeyError: '[2, 4, 5, 8, 19, 31, 33, 49, 58, 59, 72, 77, 86, 87, 88, 94, 106, 113, 127, 135, 141, 145, 148, 152, 166, 167, 175, 176, 193, 202, 203, 208, 228, 234, 239, 241, 245, 261, 275, 278, 282, 290, 291, 301, 319, 324, 337, 351, 363, 364, 367, 371, 379, 383, 395, 405, 424, 435, 436, 441, 447, 454, 463, 464, 466, 473, 477, 484, 487, 502, 504, 517, 519, 524, 529, 548, 551, 554, 559, 562, 565, 570, 577, 582, 590, 594, 595, 608, 609, 612, 616, 625, 626, 629, 639, 641, 648, 652, 662, 676, 678, 682, 683, 684, 689, 692, 699, 712, 723, 727, 734, 736, 737, 741, 742, 744, 749, 761, 763, 781, 783, 786, 801, 807, 810, 811, 816, 821, 824, 825, 840, 844, 849, 851, 862, 866, 874, 878, 879, 884, 909, 911, 924, 932, 943, 945, 946, 948, 959, 966, 970, 975, 980, 986, 998, 999, 1001, 1006, 1012, 1013, 1022, 1033, 1035, 1037, 1040, 1048, 1053, 1061, 1069, 1071, 1072, 1082, 1093, 1116, 1118, 1128, 1140, 1142, 1147, 1151, 1156, 1157, 1169, 1172, 1185, 1189, 1190, 1195, 1196, 1198, 1204, 1205, 1210, 1213, 1219, 1222, 1226, 1230, 1239, 1250, 1254, 1256, 1259, 1286, 1288, 1290, 1309, 1313, 1321, 1324, 1327, 1343, 1351, 1352, 1353, 1358, 1362, 1366, 1368, 1371, 1385, 1394, 1395, 1396, 1410, 1415, 1429, 1431, 1434, 1442, 1451, 1457, 1463, 1482, 1495, 1496, 1497, 1499, 1515, 1526, 1527, 1528, 1545, 1546, 1570, 1576, 1583, 1594, 1608, 1610, 1617, 1619, 1625, 1641, 1642, 1652, 1654, 1655, 1656, 1657, 1685, 1686, 1690, 1691, 1699, 1703, 1710, 1711, 1712, 1717, 1718, 1722, 1724, 1728, 1734, 1743, 1749, 1754, 1755, 1760, 1768, 1776, 1783, 1784, 1787, 1789, 1792, 1796, 1797, 1800, 1801, 1812, 1849, 1856, 1864, 1879, 1881, 1894, 1904, 1905, 1923, 1924, 1928, 1929, 1934, 1939, 1943, 1975, 1981, 1984, 1988, 2015, 2019, 2022, 2025, 2026, 2029, 2032, 2035, 2037, 2038, 2052, 2054, 2055, 2063, 2066, 2069, 2072, 2074, 2078, 2079, 2080, 2085, 2088, 2093, 2095, 2113, 2123, 2125, 2128, 2131, 2132, 2140, 2141, 2151, 2155, 2158, 2159, 2162, 2164, 2166, 2171, 2177, 2179, 2192, 2193, 2196, 2199, 2206, 2217, 2232, 2235, 2239, 2244, 2249, 2254, 2259, 2265, 2268, 2272, 2284, 2287, 2300, 2305, 2308, 2309, 2313, 2324, 2325, 2330, 2343, 2350, 2357, 2373, 2380, 2383, 2386, 2394, 2397, 2413, 2416, 2433, 2441, 2456, 2476, 2477, 2496, 2499, 2510, 2513, 2521, 2535, 2537, 2538, 2546, 2547, 2552, 2555, 2558, 2562, 2569, 2574, 2578, 2610, 2624, 2631, 2634, 2638, 2646, 2648, 2652, 2656, 2664, 2666, 2684, 2688, 2693, 2698, 2704, 2722, 2727, 2741, 2745, 2753, 2754, 2758, 2787, 2788, 2796, 2806, 2815, 2827, 2832, 2838, 2842, 2848, 2850, 2862, 2868, 2877, 2880, 2884, 2900, 2908, 2910, 2919, 2922, 2924, 2928, 2929, 2932, 2935, 2939, 2958, 2959, 2961, 2980, 2984, 2987, 2989, 2998, 3004, 3014, 3019, 3021, 3025, 3030, 3036, 3039, 3041, 3043, 3044, 3056, 3064, 3084, 3091, 3107, 3114, 3119, 3125, 3129, 3132, 3134, 3144, 3145, 3153, 3154, 3158, 3180, 3185, 3193, 3194, 3203, 3236, 3243, 3258, 3259, 3260, 3269, 3272, 3273, 3281, 3284, 3290, 3296, 3298, 3302, 3305, 3313, 3320, 3322, 3323, 3325, 3327, 3344, 3363, 3366, 3371, 3373, 3380, 3395, 3401, 3408, 3421, 3423, 3424, 3425, 3445, 3457, 3459, 3463, 3468, 3488, 3491, 3497, 3506, 3509, 3511, 3521, 3536, 3542, 3545, 3551, 3558, 3559, 3566, 3568, 3571, 3577, 3589, 3591, 3594, 3601, 3602, 3608, 3609, 3616, 3623, 3636, 3652, 3653, 3660, 3670, 3671, 3683, 3684, 3693, 3701, 3705, 3723, 3728, 3758, 3773, 3777, 3783, 3791, 3797, 3798, 3807, 3809, 3816, 3821, 3822, 3825, 3831, 3842, 3847, 3848, 3849, 3852, 3858, 3866, 3875, 3876, 3886, 3890, 3897, 3899, 3904, 3910, 3915, 3927, 3932, 3936, 3941, 3947, 3948, 3969, 3976, 3977, 3978, 3981, 3990, 3999, 4013, 4019, 4020, 4029, 4030, 4041, 4047, 4053, 4064, 4067, 4082, 4084, 4086, 4090, 4093, 4095, 4096, 4100, 4109, 4117, 4122, 4136, 4142, 4143, 4158, 4211, 4213, 4225, 4226, 4232, 4234, 4238, 4242, 4244, 4248, 4257, 4264, 4270, 4275, 4277, 4278, 4284, 4286, 4289, 4291, 4296, 4308, 4323, 4328, 4348, 4361, 4373, 4380, 4381, 4384, 4405, 4411, 4413, 4426, 4442, 4445, 4455, 4456, 4458, 4466, 4468, 4473, 4482, 4497, 4498, 4500, 4514, 4525, 4529, 4531, 4536, 4545, 4551, 4552, 4553, 4556, 4558, 4563, 4567, 4572, 4575, 4582, 4600, 4601, 4604, 4610, 4619, 4622, 4634, 4652, 4663, 4668, 4671, 4678, 4687, 4688, 4690, 4714, 4717, 4728, 4729, 4740, 4745, 4754, 4757, 4761, 4773, 4791, 4797, 4798, 4802, 4814, 4815, 4816, 4817, 4838, 4860, 4870, 4887, 4888, 4891, 4892, 4896, 4897, 4901, 4906, 4907, 4916, 4919, 4923, 4924, 4935, 4937, 4940, 4948, 4950, 4980, 4985, 4996, 4998, 5020, 5021, 5026, 5031, 5034, 5041, 5049, 5060, 5080, 5087, 5093, 5098, 5102, 5108, 5118, 5135, 5136, 5141, 5143, 5147, 5149, 5151, 5152, 5157, 5159, 5170, 5176, 5193, 5201, 5206, 5209, 5213, 5219, 5234, 5242, 5246, 5248, 5260, 5266, 5272, 5273, 5300, 5312, 5314, 5324, 5356, 5364, 5376, 5377, 5381, 5385, 5387, 5404, 5411, 5412, 5413, 5433, 5437, 5443, 5446, 5463, 5465, 5469, 5477, 5484, 5493, 5499, 5509, 5523, 5524, 5536, 5541, 5547, 5553, 5555, 5560, 5567, 5570, 5571, 5581, 5589, 5602, 5609, 5613, 5616, 5618, 5619, 5627, 5628, 5638, 5642, 5653, 5656, 5659, 5660, 5665, 5667, 5672, 5690, 5700, 5711, 5714, 5716, 5719, 5723, 5724, 5725, 5726, 5731, 5741, 5745, 5748, 5761, 5768, 5770, 5778, 5782, 5783, 5787, 5791, 5797, 5803, 5805, 5815, 5822, 5835, 5837, 5844, 5860, 5881, 5883, 5886, 5891, 5899, 5906, 5913, 5918, 5922, 5925, 5926, 5928, 5931, 5932, 5934, 5944, 5948, 5952, 5963, 5964, 5968, 5969, 5974, 5985, 6005, 6014, 6017, 6020, 6025, 6026, 6035, 6036, 6040, 6042, 6049, 6052, 6064, 6075, 6078, 6084, 6088, 6100, 6103, 6109, 6123, 6142, 6155, 6159, 6177, 6191, 6198, 6201, 6212, 6218, 6219, 6226, 6230, 6234, 6235, 6238, 6253, 6260, 6261, 6262, 6274, 6276, 6278, 6282, 6283, 6286, 6291, 6307, 6314, 6315, 6336, 6337, 6361, 6373, 6376, 6377, 6394, 6400, 6401, 6402, 6407, 6408, 6410, 6415, 6416, 6417, 6424, 6437, 6438, 6448, 6449, 6450, 6473, 6474, 6482, 6483, 6499, 6516, 6517, 6520, 6529, 6543, 6545, 6568, 6571, 6572, 6573, 6580, 6597, 6598, 6600, 6602, 6613, 6621, 6626, 6628, 6629, 6635, 6641, 6644, 6655, 6667, 6673, 6675, 6683, 6689, 6691, 6711, 6713, 6728, 6730, 6731, 6737, 6738, 6742, 6764, 6770, 6771, 6772, 6774, 6783, 6784, 6788, 6789, 6790, 6802, 6804, 6825, 6845, 6854, 6855, 6857, 6867, 6868, 6869, 6872, 6876, 6877, 6879, 6883, 6887, 6895, 6897, 6907, 6908, 6915, 6941, 6949, 6966, 6971, 6984, 6992, 6994, 7003, 7021, 7022, 7032, 7037, 7038, 7046, 7051, 7053, 7058, 7069, 7074, 7076, 7077, 7083, 7094, 7099, 7101, 7107, 7127, 7143, 7162, 7166, 7168, 7184, 7185, 7188, 7199, 7208, 7217, 7222, 7224, 7238, 7240, 7241, 7245, 7248, 7255, 7265, 7309, 7313, 7318, 7320, 7321, 7322, 7323, 7335, 7339, 7343, 7352, 7362, 7363, 7377, 7387, 7409, 7412, 7421, 7425, 7434, 7435, 7444, 7458, 7475, 7484, 7490, 7496, 7502, 7504, 7512, 7519, 7520, 7523, 7527, 7528, 7535, 7544, 7546, 7550, 7572, 7576, 7588, 7594, 7597, 7600, 7601, 7604, 7606, 7610, 7621, 7628, 7629, 7649, 7654, 7655, 7660, 7693, 7700, 7704, 7706, 7718, 7724, 7732, 7755, 7756, 7766, 7769, 7783, 7786, 7810, 7814, 7816, 7822, 7831, 7843, 7846, 7849, 7866, 7868, 7883, 7888, 7889, 7892, 7901, 7903, 7907, 7916, 7923, 7927, 7938, 7941, 7945, 7953, 7956, 7957, 7964, 7966, 7986, 7999, 8003, 8007, 8020, 8023, 8024, 8025, 8039, 8042, 8047, 8048, 8049, 8050, 8064, 8074, 8087, 8089, 8112, 8125, 8135, 8138, 8144, 8145, 8147, 8152, 8162, 8173, 8186, 8194, 8196, 8201, 8207, 8212, 8227, 8229, 8246, 8248, 8266, 8270, 8291, 8292, 8297, 8298, 8299, 8308, 8312, 8313, 8314, 8317, 8327, 8338, 8356, 8358, 8366, 8372, 8377, 8385, 8386, 8396, 8403, 8405, 8408, 8414, 8417, 8422, 8441, 8444, 8451, 8465, 8471, 8476, 8483, 8486, 8501, 8516, 8518, 8519, 8521, 8522, 8533, 8547, 8551, 8555, 8561, 8570, 8580, 8589, 8590, 8591, 8596, 8599, 8606, 8607, 8610, 8618, 8623, 8642, 8648, 8650, 8667, 8671, 8672, 8684, 8714, 8751, 8753, 8771, 8774, 8783, 8787, 8793, 8800, 8818, 8824, 8826, 8827, 8834, 8835, 8840, 8859, 8863, 8865, 8881, 8899, 8907, 8927, 8931, 8934, 8936, 8943, 8944, 8948, 8965, 8985, 8986, 8988, 8993, 8997, 8998, 9003, 9013, 9014, 9017, 9028, 9031, 9043, 9047, 9048, 9052, 9054, 9055, 9062, 9064, 9082, 9090, 9095, 9096, 9105, 9135, 9138, 9152, 9155, 9162, 9167, 9169, 9170, 9203, 9205, 9216, 9223, 9236, 9244, 9251, 9255, 9262, 9272, 9301, 9305, 9315, 9317, 9341, 9346, 9353, 9358, 9360, 9366, 9367, 9372, 9378, 9380, 9391, 9393, 9408, 9412, 9413, 9415, 9431, 9435, 9443, 9461, 9468, 9474, 9495, 9505, 9507, 9515, 9525, 9538, 9551, 9552, 9559, 9567, 9575, 9581, 9594, 9597, 9603, 9626, 9632, 9643, 9645, 9647, 9676, 9678, 9683, 9693, 9705, 9710, 9717, 9718, 9721, 9722, 9725, 9726, 9730, 9731, 9738, 9754, 9755, 9760, 9761, 9768, 9780, 9783, 9793, 9799, 9806, 9807, 9813, 9814, 9822, 9825, 9827, 9830, 9835, 9844, 9852, 9853, 9857, 9864, 9865, 9881, 9898, 9908, 9909, 9917, 9918, 9931, 9934, 9938, 9951, 9963, 9970, 9974, 9975, 9983, 9985, 9986, 9994, 9997, 10003, 10008, 10010, 10020, 10032, 10043, 10049, 10053, 10063, 10069, 10073, 10076, 10079, 10084, 10092, 10097, 10099, 10110, 10126, 10127, 10131, 10133, 10141, 10143, 10144, 10147, 10148, 10157, 10158, 10159, 10170, 10175, 10176, 10179, 10183, 10184, 10191, 10194, 10200, 10202, 10205, 10209, 10221, 10224, 10228, 10230, 10241, 10246, 10248, 10250, 10251, 10267, 10279, 10304, 10312, 10323, 10331, 10332, 10344, 10369, 10373, 10375, 10382, 10387, 10399, 10401, 10408, 10425, 10426, 10454, 10456, 10465, 10468, 10473, 10474, 10475, 10483, 10487, 10527, 10531, 10536, 10544, 10553, 10578, 10580, 10581, 10586, 10596, 10597, 10614, 10622, 10623, 10629, 10631, 10677, 10680, 10682, 10689, 10695, 10697, 10704, 10705, 10706, 10713, 10732, 10738, 10740, 10741, 10745, 10746, 10757, 10763, 10764, 10767, 10772, 10781, 10783, 10791, 10794, 10796, 10798, 10802, 10804, 10809, 10814, 10820, 10825, 10833, 10837, 10847, 10855, 10865, 10866, 10868, 10879, 10880, 10894, 10902, 10904, 10933, 10945, 10952, 10954, 10963, 10964, 10966, 10967, 10970, 10983, 10989, 10996, 11002, 11013, 11014, 11022, 11026, 11040, 11042, 11046, 11102, 11110, 11118, 11130, 11140, 11156, 11164, 11165, 11169, 11181, 11187, 11190, 11196, 11203, 11214, 11217, 11239, 11266, 11291, 11299, 11300, 11306, 11311, 11312, 11319, 11326, 11331, 11334, 11335, 11336, 11337, 11338, 11342, 11356, 11357, 11363, 11369, 11386, 11394, 11395, 11406, 11407, 11411, 11417, 11431, 11432, 11434, 11447, 11457, 11463, 11470, 11474, 11478, 11488, 11493, 11495, 11496, 11510, 11516, 11517, 11521, 11525, 11526, 11531, 11534, 11555, 11561, 11566, 11582, 11593, 11594, 11596, 11602, 11609, 11614, 11616, 11618, 11621, 11623, 11626, 11627, 11629, 11632, 11638, 11641, 11647, 11650, 11660, 11664, 11681, 11690, 11691, 11704, 11707, 11717, 11719, 11723, 11726, 11732, 11736, 11740, 11750, 11755, 11769, 11773, 11782, 11785, 11789, 11796, 11799, 11801, 11806, 11807, 11813, 11820, 11829, 11832, 11835, 11838, 11839, 11842, 11843, 11865, 11868, 11876, 11882, 11892, 11893, 11919, 11924, 11927, 11934, 11946, 11950, 11957, 11966, 11972, 11980, 11986, 11992, 11999, 12000, 12007, 12015, 12030, 12031, 12037, 12038, 12046, 12048, 12055, 12063, 12070, 12072, 12073, 12080, 12081, 12083, 12086, 12088, 12089, 12092, 12097, 12118, 12131, 12133, 12140, 12144, 12148, 12155, 12173, 12183, 12184, 12188, 12191, 12194, 12195, 12210, 12216, 12221, 12229, 12231, 12240, 12249, 12258, 12259, 12269, 12274, 12277, 12279, 12290, 12295, 12299, 12300, 12301, 12306, 12311, 12318, 12322, 12331, 12352, 12370, 12377, 12382, 12398, 12421, 12425, 12428, 12446, 12450, 12451, 12457, 12463, 12468, 12470, 12485, 12499, 12511, 12525, 12548, 12558, 12566, 12571, 12572, 12573, 12577, 12582, 12585, 12595, 12598, 12605, 12608, 12610, 12614, 12615, 12617, 12624, 12626, 12634, 12658, 12664, 12668, 12675, 12685, 12691, 12694, 12695, 12698, 12708, 12712, 12715, 12734, 12740, 12742, 12746, 12748, 12754, 12762, 12769, 12771, 12779, 12780, 12787, 12788, 12810, 12821, 12830, 12832, 12837, 12840, 12845, 12846, 12856, 12860, 12863, 12885, 12891, 12898, 12902, 12910, 12914, 12915, 12924, 12939, 12942, 12951, 12954, 12967, 12970, 12972, 12976, 12981, 12995, 13004, 13006, 13024, 13031, 13040, 13041, 13051, 13057, 13065, 13080, 13101, 13105, 13111, 13115, 13117, 13119, 13121, 13133, 13135, 13139, 13144, 13145, 13152, 13154, 13161, 13164, 13165, 13166, 13205, 13207, 13210, 13214, 13222, 13228, 13229, 13236, 13251, 13254, 13255, 13257, 13259, 13262, 13264, 13271, 13274, 13300, 13317, 13327, 13333, 13344, 13346, 13349, 13351, 13352, 13355, 13357, 13360, 13362, 13368, 13371, 13373, 13384, 13404, 13410, 13417, 13419, 13449, 13450, 13453, 13455, 13464, 13471, 13473, 13478, 13500, 13502, 13513, 13525, 13527, 13541, 13548, 13549, 13551, 13554, 13558, 13560, 13565, 13568, 13572, 13573, 13575, 13579, 13590, 13596, 13599, 13602, 13605, 13611, 13615, 13626, 13631, 13640, 13645, 13649, 13651, 13653, 13663, 13664, 13666, 13678, 13686, 13687, 13688, 13689, 13690, 13704] not in index'

In [None]:
# "my_decisiontree" is instantiated as an "object" of DecisionTreeClassifier "class" 
dt = DecisionTreeClassifier()
dt.fit(X_tr_new, y_tr)

In [None]:
# Predicting the Binary Label:
y_predict_dt = dt.predict(X_tt_new)
print(y_predict_dt)

In [None]:
# Estimating the probability (likelihood) of Each Label: 
y_pred_proba_dt = dt.predict_proba(X_tt_new)
y_pred_proba_dt

In [None]:
# Find accuracy score
score_dt = accuracy_score(y_tt, y_predict_dt)
print("Decision tree accuracy:", score_dt)

### Experiment 2: Hyperparameter Tuning from GridSearch

In [None]:
# Creating a dictionary of parameters to use in GridSearchCV
from sklearn.model_selection import GridSearchCV

params = {
    'criterion':  ['gini', 'entropy'],
    'max_depth':  [None, 2, 4, 6, 8, 10],
    'max_features': [None, 'sqrt', 'log2', 0.2, 0.4, 0.6, 0.8],
    'splitter': ['best', 'random']
}

clf = GridSearchCV(
    estimator=DecisionTreeClassifier(),
    param_grid=params,
    cv=5,
    n_jobs=5,
    verbose=1,
)

clf.fit(X_tr_new, y_tr)
print(clf.best_params_)

In [None]:
# Predicting the Binary Label:
y_predict_dt_gs = clf.predict(X_tt_new)
print(y_predict_dt_gs)

In [None]:
# Find accuracy score
score_dt_gs1 = accuracy_score(y_tt, y_predict_dt_gs)
print("Decision tree accuracy:", score_dt_gs1)

### Experiment 3: Hyperparameter Tuning from GridSearch with Certain Parameters

In [None]:
# Using the Parameters from GridSearchCV
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

clf_gs2 = DecisionTreeClassifier(max_depth=4, criterion='entropy', max_features=0.6, splitter='best')
clf_gs2.fit(X_tr_new, y_tr)

In [None]:
predictions = clf_gs2.predict(X_tt_new)
print(y_predict_dt_gs)

In [None]:
score_dt_gs2 = accuracy_score(y_tt, predictions)
print("Decision tree accuracy:", score_dt_gs2)