### Imports

In [52]:
import pandas as pd
import numpy as np
import cv2
from sklearn.svm import NuSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import os
from PIL import Image
import numpy as np
import pywt
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

### Semilla

In [2]:
random_seed = 42 #Number of life :)

### Hacer resize y guardar una imagen

In [36]:
def resize_and_save_img(src, destination_path):
    original_img = cv2.imread(src)
    old_image_height, old_image_width, channels = original_img.shape
    new_image_width = 60        
    new_image_height = 60
    color = (255,255,255)

    result = np.full((new_image_height, new_image_width, channels), color, dtype=np.uint8)

    x_center = (new_image_width - old_image_width) // 2
    y_center = (new_image_height - old_image_height) // 2

    # Centrar imagen
    result[y_center:y_center+old_image_height, x_center:x_center+old_image_width] = original_img

    Image.fromarray(result).save(destination_path)

### Unificar data de Train y Test

In [37]:
def generate_new_data():
    train_dir = "Data/Train/"
    test_dir = "Data/Test/"
    destination_dir = "Data_preprocesada/"

    for class_dir in os.listdir(train_dir):
        for train_img in os.listdir(train_dir+class_dir):
            resize_and_save_img(f"{train_dir}{class_dir}/{train_img}", f"{destination_dir}{class_dir}/{train_img}")

    test_info = pd.read_csv("Data/Test.csv")
    for i, test_img in enumerate(sorted(os.listdir(test_dir))):
        resize_and_save_img(f"{test_dir}{test_img}", f"{destination_dir}{test_info.ClassId[i]}/{test_img}")

#generate_new_data() Se corre solo una vez

In [1]:
def get_vector_from_image(image, iterations):
    LL, (LH, HL, HH) = pywt.dwt2(image, 'haar')
    for _ in range(iterations - 1):
        LL, (LH, HL, HH) = pywt.dwt2(LL, 'haar')
    return LL.flatten()

#### Obtener data (estratificada y no estratificada)

In [4]:
def get_data(src_dir, iterations):
    x = []
    y = []

    for class_dir in os.listdir(src_dir):
        for train_img in os.listdir(src_dir + class_dir):
            image_path = f"{src_dir}{class_dir}/{train_img}"
            img = Image.open(image_path)
            fv = get_vector_from_image(img, iterations)
            x.append(fv)
            y.append(int(class_dir))
    return np.asarray(x), np.asarray(y)

In [3]:
def normalization(data):
    columns = data.transpose()
    normalized_data = []
    for column in columns:
        minimum = min(column)
        maximum = max(column)
        normalized_column = np.asarray([(n - minimum) / (maximum - minimum) for n in column])
        normalized_data.append(normalized_column)
    return np.asarray(normalized_data).transpose() 

In [5]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

### K-fold Cross Validation Estratificado

In [44]:
def get_stratified_k_fold_cross_validation(X, y, number_of_folds, random_seed):
    skf = StratifiedKFold(n_splits=number_of_folds, shuffle=True, random_state=random_seed)
    skf.get_n_splits(X, y)

    k_folds = []

    for train_index, test_index in skf.split(X, y):
        fold = {}
        fold['X_train'] = X[train_index]
        fold['X_test'] = X[test_index]
        fold['y_train'] = y[train_index]
        fold['y_test'] = y[test_index]
        k_folds.append(fold)
    
    return k_folds

### K-fold Cross Validation no Estratificado

In [50]:
def get_non_stratified_k_fold_cross_validation(X, y, number_of_folds, random_seed):
    kf = KFold(n_splits=number_of_folds, shuffle=True, random_state=random_seed)
    kf.get_n_splits(X)
    k_folds = []
    for train_index, test_index in kf.split(X):
        fold = {}
        fold['X_train'] = X[train_index]
        fold['X_test'] = X[test_index]
        fold['y_train'] = y[train_index]
        fold['y_test'] = y[test_index]
        k_folds.append(fold)
    return k_folds