# F20DL Lab 8

## Setup

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os
import pandas as pd
import math
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=8)
mpl.rc('xtick', labelsize=8)
mpl.rc('ytick', labelsize=8)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [2]:
# Load the dataset into numpy arrays
images = np.load("./datasets/smiley_X.npy")
labels = np.load("./datasets/smiley_Y.npy")

In [3]:
# Flatten image data and convert values to int
X = np.asarray([x.flatten() for x in images]).astype(int)
y = labels.astype(int)

In [4]:
# data is a list of all images. Images are represented as a flattened row of pixel values.
# To get pixel (r, c) of image i, use X[i-1][9*(r-1) + (c-1)] 
# Or use images[i][r-1][c-1][0]

# X[0] returns a row of pixel values representing the 1st image.
# X[0][24] and images[0][2][6][0] return the value of the pixel in the 3rd row and 7th column of the 1st image 
# (r=3, c=7).
# y[0] returns the label for the 1st image.
images[0][0][0][0]

0.0

In [5]:
# Shows smiley as an image. 'smiley' should be an element in 'images'
def show_smiley(smiley):
    plt.figure(figsize=(2,2))
    plt.imshow(smiley, cmap="Greys")

In [6]:
# Shifts a percentage of instances from the training set to the test set and returns the new training and test sets
def shift_instances(d_train, d_test, t_train, t_test, percent):    
    shift_count = math.ceil(percent * d_train.shape[0])    # No. of instances to shift

    # Shift first x% of samples to test set
    new_d_train = d_train[shift_count:]
    new_d_test = np.concatenate((d_test, d_train[:shift_count]))
    new_t_train = t_train[shift_count:]
    new_t_test = np.concatenate((t_test, t_train[:shift_count]))
    return (new_d_train, new_d_test, new_t_train, new_t_test)

## Decision Trees

In [7]:
### Tree 0

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
print("Training samples: %i, Testing samples: %i" % (X_train.shape[0], X_test.shape[0]))

Training samples: 100, Testing samples: 44


In [9]:
# Create Decision Tree classifer object
classifier = DecisionTreeClassifier()

# Train Decision Tree Classifer
classifier = classifier.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = classifier.predict(X_train)

In [10]:
accuracy_score(y_train, y_pred)

1.0

In [11]:
cross_val_score(classifier, X_train, y_train, cv=10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

### Tree 1

In [12]:
# Create Decision Tree classifer object
classifier1 = DecisionTreeClassifier()

# Train Decision Tree Classifer
classifier1 = classifier1.fit(X_train,y_train)

#Predict the response for test dataset
y1_pred = classifier1.predict(X_test)

In [13]:
accuracy_score(y_test, y1_pred)

1.0

### Tree 2

In [14]:
# Shift 30% of original training data to test set
X2_train, X2_test, y2_train, y2_test = shift_instances(X_train, X_test, y_train, y_test, 0.3)
print("Training samples: %i, Testing samples: %i" % (X2_train.shape[0], X2_test.shape[0]))

Training samples: 70, Testing samples: 74


In [15]:
# Create Decision Tree classifer object
classifier2 = DecisionTreeClassifier()

# Train Decision Tree Classifer
classifier2 = classifier2.fit(X2_train,y2_train)

#Predict the response for test dataset
y2_pred = classifier2.predict(X2_test)

In [16]:
accuracy_score(y2_test, y2_pred)

1.0

### Tree 3

In [17]:
# Shift 60% of original training data to test set
X3_train, X3_test, y3_train, y3_test = shift_instances(X_train, X_test, y_train, y_test, 0.6)
print("Training samples: %i, Testing samples: %i" % (X3_train.shape[0], X3_test.shape[0]))

Training samples: 40, Testing samples: 104


In [18]:
# Create Decision Tree classifer object
classifier3 = DecisionTreeClassifier()

# Train Decision Tree Classifer
classifier3 = classifier3.fit(X3_train,y3_train)

#Predict the response for test dataset
y3_pred = classifier3.predict(X3_test)

In [19]:
accuracy_score(y3_test, y3_pred)

1.0

## Random Forest Trees

# Tree 1

In [20]:
from sklearn.ensemble import RandomForestClassifier

RFClassifier = RandomForestClassifier()

RFClassifier = RFClassifier.fit(X_train,y_train)

RFy_pred = RFClassifier.predict(X_test)

In [21]:
accuracy_score(y_test, RFy_pred)

1.0

# Tree 2

In [22]:
RFClassifier2 = RandomForestClassifier()

RFClassifier2 = RFClassifier2.fit(X2_train,y2_train)

RFy2_pred = RFClassifier2.predict(X2_test)

In [23]:
accuracy_score(y2_test, RFy2_pred)

1.0

# Tree 3

In [24]:
RFClassifier3 = RandomForestClassifier()

RFClassifier3 = RFClassifier3.fit(X3_train,y3_train)

RFy3_pred = RFClassifier3.predict(X3_test)

In [25]:
accuracy_score(y3_test, RFy3_pred)

1.0