# Notebook Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
# Load dataset and print 1st five rows
dataset = pd.read_csv('takingawalk_dataset.csv', sep=';')
print(dataset.head())

   Week Outlook Humidity Wind  Label
0     1   Rainy     High  Yes      0
1     2   Sunny   Normal   No      1
2     3   Sunny   Normal  Yes      1
3     4   Sunny     High  Yes      0
4     5   Rainy   Normal  Yes      0


In [3]:
X = dataset.drop(columns=["Label", "Week"])
y = dataset["Label"]
X = pd.get_dummies(X)
print(X.columns)

Index(['Outlook_Rainy', 'Outlook_Sunny', 'Humidity_High', 'Humidity_Normal',
       'Wind_No', 'Wind_Yes'],
      dtype='object')


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
                                                    shuffle=True, random_state=42)

In [5]:
clf = GradientBoostingClassifier(n_estimators=100, 
                                 max_depth=3, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

Confusion Matrix:
 [[10  1]
 [ 0  5]]


In [6]:
accuracy_score(y_test, y_pred)

0.9375

In [7]:
feature_score = pd.Series(clf.feature_importances_, 
                          index=X_train.columns).sort_values(ascending=False)
print("Feature Scores:\n", feature_score)

Feature Scores:
 Wind_No            0.196622
Outlook_Rainy      0.188464
Outlook_Sunny      0.187038
Wind_Yes           0.185601
Humidity_Normal    0.129333
Humidity_High      0.112943
dtype: float64
