# Comparison of KMeans Implementations

## Import Statements

In [1]:
from cold_kmeans import *

In [2]:
import numpy as np
import pandas as pd
import time
import timeit
import line_profiler
import memory_profiler
from sklearn.cluster import KMeans

## Data Loading

In [3]:
pokemon = pd.read_csv("../pokedex.csv", sep = ",")
pokemon = pokemon[["name", "status", "type_number", 
        "height_m", "weight_kg", "abilities_number", 
        "total_points", "hp", "attack", 
        "defense", "sp_attack", "sp_defense", 
        "speed", "egg_type_number"]]
pokemon_np = pokemon[["height_m", "total_points"]].to_numpy()

In [6]:
%load_ext memory_profiler

## Cold Implementation

In [4]:
%%timeit -r7 -n15

sa, sb = cold_means("../pokedex.csv", 3, 22, ["total_points", "height_m"], 10)

9 ms ± 687 µs per loop (mean ± std. dev. of 7 runs, 15 loops each)


In [5]:
%%time

sa, sb = cold_means("../pokedex.csv", 3, 22, ["total_points", "height_m"], 10)

CPU times: total: 31.2 ms
Wall time: 11 ms


In [7]:
%memit cold_means("../pokedex.csv", 3, 22, ["total_points", "height_m"], 10)

peak memory: 140.77 MiB, increment: 0.05 MiB


## Sklearn Implementation

In [8]:
%%timeit -r7 -n15

km_alg = KMeans(n_clusters=3, init="random", random_state = 22, max_iter = 200)
fit = km_alg.fit(pokemon_np)

25.5 ms ± 805 µs per loop (mean ± std. dev. of 7 runs, 15 loops each)


In [9]:
%%time

km_alg = KMeans(n_clusters=3, init="random", random_state = 22, max_iter = 200)
fit = km_alg.fit(pokemon_np)

CPU times: total: 203 ms
Wall time: 32 ms


In [10]:
def skmeans(data):
    km_alg = KMeans(n_clusters=3, init="random", random_state = 22, max_iter = 200)
    fit = km_alg.fit(data)

In [11]:
%memit skmeans(pokemon_np)

peak memory: 141.59 MiB, increment: 0.00 MiB
