# Comparison of Kfold Implementations

## Import Statements

In [1]:
from cold_crossval import *

In [2]:
import numpy as np
import pandas as pd
import time
import timeit
import line_profiler
import memory_profiler
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error

## Data Loading

In [3]:
overall_poke = data_reduction("../pokedex.csv", ["status"])[["name", "status", "type_number", 
        "height_m", "weight_kg", "abilities_number", 
        "total_points", "hp", "attack", 
        "defense", "sp_attack", "sp_defense", 
        "speed", "egg_type_number"]]
overall_poke_np = overall_poke.dropna(axis = 0).to_numpy()

In [4]:
%load_ext memory_profiler

## Cold Implementation

In [5]:
%%timeit -r7 -n15
cold_kfoldcv(data = overall_poke_np,
         col_names = ["name", "status", "type_number", "height_m", "weight_kg", "abilities_number", "total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "egg_type_number"], 
         inputs = ["total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed"], 
         output = "status", 
         k = 10)

12.8 ms ± 263 µs per loop (mean ± std. dev. of 7 runs, 15 loops each)


In [6]:
%%time
cold_kfoldcv(data = overall_poke_np,
         col_names = ["name", "status", "type_number", "height_m", "weight_kg", "abilities_number", "total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "egg_type_number"], 
         inputs = ["total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed"], 
         output = "status", 
         k = 10)

CPU times: total: 15.6 ms
Wall time: 14 ms


17.547991623834

In [7]:
%memit cold_kfoldcv(data = overall_poke_np, col_names = ["name", "status", "type_number", "height_m", "weight_kg", "abilities_number", "total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "egg_type_number"], inputs = ["total_points", "hp", "attack", "defense", "sp_attack", "sp_defense", "speed"], output = "status", k = 10)

peak memory: 137.57 MiB, increment: 0.05 MiB


In [19]:
train, test = train_test_split(overall_poke_np, train_size = .9, random_state = 2022)

tree = DecisionTreeClassifier(ccp_alpha = 0.001, max_depth = 3)
tree.fit(train[:, 6:13], train[:, 1].astype(int))

preds = tree.predict(test[:, 6:13])

cold_classification(preds, test[:, 1])

0.02912621359223301

## Sklearn Implementation

In [11]:
%%timeit -r7 -n15

tree = DecisionTreeClassifier(ccp_alpha = 0.001, max_depth = 3)
cv_result = cross_val_score(tree, overall_poke_np[:, 6:13], overall_poke_np[:, 1].astype(int), cv = 10, scoring = 'accuracy')

mean_cv = np.mean(cv_result)
# print(1 - mean_cv)

12.7 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 15 loops each)


In [13]:
%%time

tree = DecisionTreeClassifier(ccp_alpha = 0.001, max_depth = 3)
cv_result = cross_val_score(tree, overall_poke_np[:, 6:13], overall_poke_np[:, 1].astype(int), cv = 10, scoring = "accuracy")

mean_cv = np.mean(cv_result)

CPU times: total: 15.6 ms
Wall time: 15.4 ms


In [14]:
def fold_func(data):
    tree = DecisionTreeClassifier(ccp_alpha = 0.001, max_depth = 3)
    cv_result = cross_val_score(tree, data[:, 6:13], data[:, 1].astype(int), cv = 10, scoring = "accuracy")
    mean_cv = np.mean(cv_result)

In [15]:
%memit fold_func(overall_poke_np)

peak memory: 138.09 MiB, increment: 0.00 MiB
