Prepocessing Of Data(Use Same AS given on Kaggle)

In [593]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from subprocess import check_call
from PIL import Image, ImageDraw, ImageFont

# Loading the data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Store our test passenger IDs for easy access
PassengerId = test['PassengerId']

# Showing overview of the train dataset
train.head(3)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


Cleaning And Feature Extracting

In [594]:
# Copy original dataset in case we need it later when digging into interesting features
# WARNING: Beware of actually copying the dataframe instead of just referencing it
# "original_train = train" will create a reference to the train variable (changes in 'train' will apply to 'original_train')
original_train = train.copy() # Using 'copy()' allows to clone the dataset, creating a different object with the same values

# Feature engineering steps taken from Sina and Anisotropic, with minor changes to avoid warnings
full_data = [train, test]

# Feature that tells whether a passenger had a cabin on the Titanic
train['Has_Cabin'] = train["Cabin"].apply(lambda x: 0 if type(x) == float else 1)
test['Has_Cabin'] = test["Cabin"].apply(lambda x: 0 if type(x) == float else 1)

# Create new feature FamilySize as a combination of SibSp and Parch
for dataset in full_data:
    dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
# Create new feature IsAlone from FamilySize
for dataset in full_data:
    dataset['IsAlone'] = 0
    dataset.loc[dataset['FamilySize'] == 1, 'IsAlone'] = 1
# Remove all NULLS in the Embarked column
for dataset in full_data:
    dataset['Embarked'] = dataset['Embarked'].fillna('S')
# Remove all NULLS in the Fare column
for dataset in full_data:
    dataset['Fare'] = dataset['Fare'].fillna(train['Fare'].median())

# Remove all NULLS in the Age column
for dataset in full_data:
    age_avg = dataset['Age'].mean()
    age_std = dataset['Age'].std()
    age_null_count = dataset['Age'].isnull().sum()
    age_null_random_list = np.random.randint(age_avg - age_std, age_avg + age_std, size=age_null_count)
    # Next line has been improved to avoid warning
    dataset.loc[np.isnan(dataset['Age']), 'Age'] = age_null_random_list
    dataset['Age'] = dataset['Age'].astype(int)

# Define function to extract titles from passenger names
def get_title(name):
    title_search = re.search(' ([A-Za-z]+)\.', name)
    # If the title exists, extract and return it.
    if title_search:
        return title_search.group(1)
    return ""

for dataset in full_data:
    dataset['Title'] = dataset['Name'].apply(get_title)
# Group all non-common titles into one single grouping "Rare"
for dataset in full_data:
    dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')

    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')

for dataset in full_data:
    # Mapping Sex
    dataset['Sex'] = dataset['Sex'].map( {'female': 0, 'male': 1} ).astype(int)
    
    # Mapping titles
    title_mapping = {"Mr": 1, "Master": 2, "Mrs": 3, "Miss": 4, "Rare": 5}
    dataset['Title'] = dataset['Title'].map(title_mapping)
    dataset['Title'] = dataset['Title'].fillna(0)

    # Mapping Embarked
    dataset['Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)
    
    # Mapping Fare
    dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] 						        = 0
    dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
    dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare']   = 2
    dataset.loc[ dataset['Fare'] > 31, 'Fare'] 							        = 3
    dataset['Fare'] = dataset['Fare'].astype(int)
    
    # Mapping Age
    dataset.loc[ dataset['Age'] <= 16, 'Age'] 					       = 0
    dataset.loc[(dataset['Age'] > 16) & (dataset['Age'] <= 32), 'Age'] = 1
    dataset.loc[(dataset['Age'] > 32) & (dataset['Age'] <= 48), 'Age'] = 2
    dataset.loc[(dataset['Age'] > 48) & (dataset['Age'] <= 64), 'Age'] = 3
    dataset.loc[ dataset['Age'] > 64, 'Age'] 


Feature selection: remove variables no longer containing relevant information


In [595]:
drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp']
train = train.drop(drop_elements, axis = 1)
test  = test.drop(drop_elements, axis = 1)

Title VS Sex

In [596]:
train[['Title', 'Survived']].groupby(['Title'], as_index=False).agg(['mean', 'count', 'sum'])


Unnamed: 0_level_0,Survived,Survived,Survived
Unnamed: 0_level_1,mean,count,sum
Title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0.156673,517,81
2,0.575,40,23
3,0.793651,126,100
4,0.702703,185,130
5,0.347826,23,8


In [597]:
train[['Sex', 'Survived']].groupby(['Sex'], as_index=False).agg(['mean', 'count', 'sum'])


Unnamed: 0_level_0,Survived,Survived,Survived
Unnamed: 0_level_1,mean,count,sum
Sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,0.742038,314,233
1,0.188908,577,109


In [598]:
title_and_sex = original_train.copy()[['Name', 'Sex']]

# Create 'Title' feature
title_and_sex['Title'] = title_and_sex['Name'].apply(get_title)

# Map 'Sex' as binary feature
title_and_sex['Sex'] = title_and_sex['Sex'].map( {'female': 0, 'male': 1} ).astype(int)

# Table with 'Sex' distribution grouped by 'Title'
title_and_sex[['Title', 'Sex']].groupby(['Title'], as_index=False).agg(['mean', 'count', 'sum'])

# Since Sex is a binary feature, this metrics grouped by the Title feature represent:
    # MEAN: percentage of men
    # COUNT: total observations
    # SUM: number of men


Unnamed: 0_level_0,Sex,Sex,Sex
Unnamed: 0_level_1,mean,count,sum
Title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Capt,1.0,1,1
Col,1.0,2,2
Countess,0.0,1,0
Don,1.0,1,1
Dr,0.857143,7,6
Jonkheer,1.0,1,1
Lady,0.0,1,0
Major,1.0,2,2
Master,1.0,40,40
Miss,0.0,182,0


Create Numpy arrays of train, test and target (Survived) dataframes to feed into our models


In [599]:
y_train = train['Survived']
x_train = train.drop(['Survived'], axis=1).values 
x_test = test.values

adaboost_x_train = x_train
adaboost_x_test = x_test
adaboost_y_train = y_train
x_test.shape
adaboost_x_test.shape

(418, 10)

In [600]:
class Node:
    """This is the node class, having left and right children to store the value, values to be store at leaf node"""
    def __init__(self, feature_idx=None, split=None, predicted_class=None,num_samples=None):
        self.feature_idx = feature_idx   # index of feature used for splitting
        self.split = split               # value of feature used for splitting
        self.predicted_class = predicted_class   # majority class in the node
        self.left = None                 # left child node
        self.right = None                # right child node
        self.is_leaf = False             # flag indicating if the node is a leaf
        self.num_samples = num_samples
        # self.value = None

    def set_params(self, feature_idx, split): # setting the parameter 
        self.feature_idx = feature_idx
        self.split = split
    
    def set_children(self, left, right): # set children of the given parent node
        self.left = left
        self.right = right


In [601]:
class DecissionTree:
  def __init__(self, criterion, max_depth,min_samples_split,min_samples_leaf):
    """
    criterion - Either misclassification rate, Gini impurity, or entropy.
    max_depth - The maximum depth the tree should grow.
    min_samples_split - The minimum number of samples required to split.
    min_samples_leaf - The minimum number of samples required for a leaf node.
    """
    self.criterion = criterion
    self.max_depth= max_depth
    self.min_samples_split = min_samples_split
    self.min_samples_leaf = min_samples_leaf
    self.root = None

  def fit(self,X,y):
    """Fit method to initialize X value and y to get the no. of classes, unique classes and to build the tree"""
    self.n_classes_ = len(np.unique(y)) # getting total no. of value
    self.classes_ = np.unique(y) # get all unique classes
    n_samples, self.n_features_ = X.shape
    self.root = self.grow_decission_tree(X, y) #building the tree
    return self

  def grow_decission_tree(self,X,y,depth = 0):
    num_samples_per_class = np.bincount(y, minlength=self.n_classes_) # geting no. of samples per class
    predicted_class = np.argmax(num_samples_per_class) # take the predicted class
    node = Node(predicted_class=predicted_class, num_samples=y.size) # Initialize the node
    # True condition depth is less than max_depth and if the size of y is greater than  min samples split 
    stopping_condition = depth < self.max_depth and y.size >= self.min_samples_split 
    if stopping_condition:
      best_gain = 0.0 # initialize the gain and criteria and best sets
      best_criteria = None
      best_sets = None
      for feature_idx in range(self.n_features_): # iterating through all feature_s
        feature_values = X[:, feature_idx] 
        possible_splits = self.split_the_data(feature_values) #getting best possible spit
        for split in possible_splits: 
          left_indices = feature_values <= split # getting indices for left child
          right_indices = feature_values > split # getting indices for right child
          if np.sum(left_indices) > 0 and np.sum(right_indices) > 0: # checking fot the cornner cases
            y_left = y[left_indices]
            y_right = y[right_indices]
            gain = self._criterion_gain(y, y_left, y_right) # getting criterion gain for the given criterion options are gini entropy misclassification errot
            if gain > best_gain:  # getting the best gain and selecting that splits from all the best possible split
              best_gain = gain
              best_criteria = (feature_idx, split)
              best_sets = (left_indices, right_indices)
      if best_gain > 0.0: # if some gain than take the left anf right child
        left = self.grow_decission_tree(X[best_sets[0]], y[best_sets[0]], depth + 1)  # grow left tree
        right = self.grow_decission_tree(X[best_sets[1]], y[best_sets[1]], depth + 1) # grow right tree
        node.set_params(best_criteria[0],best_criteria[1]) # setting parameters
        node.set_children(left,right) # setting children 
        node.num_samples = None
      else:
        node.is_leaf = True # its the leaf tree
        node.set_children(None,None)
    return node

  def split_the_data(self, feature_array):
    """This is a Python function that splits an input feature array into intervals. It first finds the unique values in the feature array using NumPy. If there is only one unique value, it returns an empty list. If there are multiple unique values, the function calculates the midpoints between consecutive values by taking the average of each value with the next value in the sorted list of unique values
    """
    data = np.unique(feature_array)
    if len(data) == 1:
        return [] 
    mp = (data[:-1] + data[1:]) / 2
    return mp

  def _criterion_gain(self, y, y_left, y_right):
    """
    Common function for all criterian"""
    parent_score = self.criterion_function(y)
    left_score = self.criterion_function(y_left)
    right_score = self.criterion_function(y_right)
    fl = y_left.size / y.size
    fr = y_right.size / y.size
    gain = parent_score - (fl * left_score + fr * right_score)
    return gain

  def criterion_function(self, y):
    """This function calculates the criterion score for a dataset based on a specified criterion, which can be 'gini', 'entropy', or 'misclassification error'.
    It takes one input array y representing the target variable and returns the calculated score."""
    if self.criterion == 'gini':
      _, l = np.unique(y, return_counts=True)
      probs = l / y.size
      cal_score = 1.0 - np.sum(probs ** 2)
    elif self.criterion == 'entropy':
      _, l = np.unique(y, return_counts=True)
      probs = l / y.size
      cal_score = -np.sum(probs * np.log2(probs))
    elif self.criterion == 'misclassification error':
      _, l = np.unique(y, return_counts=True)
      probs = l / y.size
      cal_score = 1.0 - np.max(probs)
    else:
        raise ValueError("No Allowed values: 'gini', 'entropy', 'misclassification error' is passed.")
    return cal_score
  
  def predict(self, X):
    """This function makes predictions using a trained decision tree on new data. It takes an input feature matrix `X`, 
    uses a for loop to traverse each sample, and returns an array of predicted class labels for each sample in `X`."""
    n_samples = X.shape[0]
    y_pred = np.zeros((n_samples,))
    for i in range(n_samples): # Traversing the whole tree
      node = self.root
      while not node.is_leaf:
        if node.split is not None: # for corner case if no split possible
          if X[i][node.feature_idx] <= node.split:
            node = node.left
          else:
            node = node.right
        else:
          break
      y_pred[i] = node.predicted_class
    return y_pred


In [602]:
import numpy as np

# create a decision tree and fit it to the data
tree = DecissionTree(criterion='gini', max_depth=6 , min_samples_split=2, min_samples_leaf=1)
tree.fit(x_train, y_train)


y_pred = tree.predict(x_train)
accuracy = np.mean(y_pred == y_train)

print('Training Accuracy:', accuracy)
y_pred = tree.predict(x_test)
print("Predicted Class Label:", y_pred)


Training Accuracy: 0.8619528619528619
Predicted Class Label: [0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 1. 0.
 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0.
 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0.
 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 1.
 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0.
 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0.
 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1.
 1. 1. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0.
 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0.

# Random Forests

In [603]:
import random
from collections import Counter

class RandomForest:
    """
    for replacement and extracting features 
    https://towardsdatascience.com/understanding-sampling-with-and-without-replacement-python-7aff8f47ebe4
    for random sampling
    https://pynative.com/python-random-sample/
    use of max for most common
    https://www.geeksforgeeks.org/python-max-function/
    """
    def __init__(self, classifier, num_trees, min_features):
        """
        classifier - decission tree object
        num_trees - No. of trees.
        min_features - Minimum features to take.
        """
        self.classifier = classifier
        self.num_trees = num_trees
        self.min_features = min_features
        self.feature_indices = None # to store randomly selected features
    def sample_with_replacement(self,X):
        return [random.randint(0, len(X)-1) for _ in range(len(X))] # Replacing randome
    def random_subset_feature(self,nf):
        return random.sample(range(nf), random.randint(self.min_features, nf)) # choosing random no. of features ranging from min_features to total no. of features
    def fit(self, X, y):
        """
        Making n trees, selecting sample with replacement and taking random feature subser=t
        """
        # Create num_trees decision trees
        self.trees = []
        for _ in range(self.num_trees): # making n no. of trees
            random.seed(123)
            sample_indices = self.sample_with_replacement(X) # Sample with replacement
            data = X[sample_indices] # get data
            class_pred = y[sample_indices] # getting y values after replacement

            feature_indices = self.random_subset_feature(X.shape[1])  # Select a random subset of features
            self.feature_indices = feature_indices
            data = data[:, feature_indices] # get data after random selecting

            tree = self.classifier # Fit a decision tree using the sample and feature subset
            tree.fit(data, class_pred)
            self.trees.append(tree) # append object of tree after training

    def predict(self, X):
        """
        Getting most common from all trees"""
        predictions = self.get_predictions_from_tree(X)
        # Find the most common prediction for each sample
        y_pred = []
        for i in range(len(X)):
            sample_predictions = [pred[i] for pred in predictions]
            most_common = max(set(sample_predictions), key=sample_predictions.count)
            y_pred.append(most_common)
        return y_pred
    
    def get_predictions_from_tree(self,X):
        """
        Get predictions from All trees"""
        predictions = []
        for tree in self.trees:
            predictions.append(tree.predict(X))
        return predictions


In [604]:
rf = RandomForest(DecissionTree(criterion='entropy', max_depth=6, min_samples_split=2, min_samples_leaf=1), num_trees=5, min_features=4)
rf.fit(x_train,y_train)
y_pred = rf.predict(x_train)
accuracy = np.mean(y_pred == y_train)

print('Training Accuracy:', accuracy)
y_pred = rf.predict(x_test)
print("Predicted Class Label:", y_pred)

Training Accuracy: 0.3153759820426487
Predicted Class Label: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0

# Adaboost

In [605]:
class AdaBoost:
    def __init__(self, weak_learner, num_learners, learning_rate):
        self.weak_learner = weak_learner
        self.num_learners = num_learners
        self.learning_rate = learning_rate
        self.learners = []
        self.weights = []
    def initializing_the_weights(self,n):
        """
        initializing weight
        """
        gen_weights = np.full(n, 1/n)
        return gen_weights
    def fit(self, X, y):
        self.weights = self.initializing_the_weights(X.shape[0]) # Initialize weights

        for i in range(self.num_learners):
            # Weight training data by sample weights
            # print(f"x={X.shape},w={self.weights.shape}")
            reshape_weight_array= np.array(self.weights).reshape(-1, 1) #reshaping for manage shape
            weighted_X = np.multiply(X, reshape_weight_array) #calculated weighted array 
            # print(f"y={y.shape},wx={weighted_X.shape}")
            learner = self.weak_learner.fit(weighted_X, y) # Train weak learner (Decission tree) on weighted data
            y_pred = learner.predict(X) # Predict on training data
            incorrect = [1 if y_pred[i] != y[i] else 0 for i in range(len(y))] 
            weighted_error = sum([w * incorrect[i] for i, w in enumerate(self.weights)]) / sum(self.weights) # and calculate error

            if weighted_error == 0:
                learner_weight = float('inf')
            else:
                learner_weight = np.log((1 - weighted_error) / weighted_error) * self.learning_rate
            exponent = np.multiply(y, y_pred)
            self.weights *= np.exp(-learner_weight * np.where(exponent < 0, 1, 0))

            self.learners.append(learner)
            self.weights /= np.sum(self.weights) # add weights

            if weighted_error == 0: # stoppoing early if no error change
                break

    def predict(self, X):
        # Predict using weighted ensemble of learners
        # print(f"x={X.shape},w={self.weights.shape}")
        learner_preds = [learner.predict(X) for learner in self.learners]

        weights = np.tile(np.array(self.weights)[:, np.newaxis], (1, len(self.learners)))
        # print(f"x={np.array(learner_preds).shape},w={weights.shape}")
        learner_preds_trans = np.array(learner_preds).T
        ensemble_pred = np.sign(np.sum(weights * learner_preds_trans, axis=1))
        return ensemble_pred

In [606]:
# Create AdaBoost classifier with DecisionTree weak learner
adaboost = AdaBoost(weak_learner=DecissionTree(criterion='misclassification error', max_depth=18, min_samples_split=2, min_samples_leaf=1),
                    num_learners=50,
                    learning_rate=0.1)

# Train AdaBoost classifier on training data
adaboost.fit(adaboost_x_train, adaboost_y_train)
y_pred = adaboost.predict(adaboost_x_train)
accuracy = np.mean(y_pred == adaboost_y_train)

print('Training Accuracy:', accuracy)
y_pred = adaboost.predict(adaboost_x_train)
print("Predicted Class Label:", y_pred)

Training Accuracy: 0.24242424242424243
Predicted Class Label: [1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1.
 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1.
 1. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0.
 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1.
 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0.
 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1.
 1. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 0. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1.
 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1.
 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1

# Accuracy 

| Model | Accuracy(%) |
| --- | ----------- |
| Decission Tree | 83.05274971941639% |
| Random Forest | 40.62401795735129% |
| Adaboost | 24.464646464646464% |