In [None]:
import numpy as np
from src.perceptron import Perceptron
from src.trainer import TrainerConfig, train_perceptron, evaluate_perceptron, Scaler
from src.readers import read_csv
from src.plotter import plot_arrays, plot_bars
from src.theta_funcs import ThetaFunction
from src.error_funcs import cost_average
from src.dataset_selectors import split_random_by_percentage, handpick_diverse_training_dataset
from src.runner import run_n_times, run_iteration

In [None]:
dataset, dataset_outputs = read_csv('TP3-ej2-conjunto.csv')
config_linear = TrainerConfig.from_file("ejercicio2-lineal-config.json")
config_tanh = TrainerConfig.from_file("ejercicio2-tanh-config.json")
config_logistic = TrainerConfig.from_file("ejercicio2-logistic-config.json")

In [None]:
def split_random_by_percentage_and_theta(percentage: float, theta: ThetaFunction, theta_name: str, config: TrainerConfig):
    train_dataset, train_dataset_outputs, test_dataset, test_dataset_outputs = split_random_by_percentage(dataset, dataset_outputs, percentage)

    if(theta_name != "lineal"):
        train_dataset_outputs = config.scaler.scale(train_dataset_outputs)
        test_dataset_outputs = config.scaler.scale(test_dataset_outputs)
    name = f"{percentage*100}% Training {100-percentage*100}% Test para theta {theta_name}"
    
    return train_dataset, train_dataset_outputs, test_dataset, test_dataset_outputs, name

def split_handpicked_by_theta(theta: ThetaFunction, theta_name: str, config: TrainerConfig):
    train_dataset, train_dataset_outputs, test_dataset, test_dataset_outputs = handpick_diverse_training_dataset(dataset, dataset_outputs)

    if(theta_name != "lineal"):
        train_dataset_outputs = config.scaler.scale(train_dataset_outputs)
        test_dataset_outputs = config.scaler.scale(test_dataset_outputs)
        name = f"Handpick para theta {theta_name}"
    
    return train_dataset, train_dataset_outputs, test_dataset, test_dataset_outputs, name

In [None]:
''' 
    Evaluamos la capacidad de Aprendiza Vs. Generalizacion para TANH
'''

proportions = [0.1, 0.5, 0.9]

results = []

for proportion in proportions:
    (
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        name,
    ) = split_random_by_percentage_and_theta(
        proportion, config_tanh.theta, "tanh", config_tanh
    )

    results.append(run_iteration(
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        config_tanh,
    ))

In [None]:
for i in range(len(proportions)):
    train_percentage = proportions[i] * 100
    test_percentage = 100 - train_percentage
    plot_arrays(
        [results[i].result.error_history, results[i].test_error_history],
        ["Error de entrenamiento", "Error de test"],
        f"Error de entrenamiento y test para {train_percentage}% entrenamiento y {test_percentage}% test para TANH",
        "Epocas",
        "Error",
    )


In [None]:
''' 
    Evaluamos la capacidad de Aprendiza Vs. Generalizacion para TANH
'''

proportions = [0.75, 0.85, 0.95]

results = []

for proportion in proportions:
    (
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        name,
    ) = split_random_by_percentage_and_theta(
        proportion, config_tanh.theta, "tanh", config_tanh
    )

    results.append(run_iteration(
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        config_tanh,
    ))

In [None]:
for i in range(len(proportions)):
    train_percentage = proportions[i] * 100
    test_percentage = 100 - train_percentage
    plot_arrays(
        [results[i].result.error_history, results[i].test_error_history],
        ["Error de entrenamiento", "Error de test"],
        f"Error de entrenamiento y test para {train_percentage}% entrenamiento y {test_percentage}% test para TANH",
        "Epocas",
        "Error",
    )


In [None]:
''' 
    Evaluamos la capacidad de Aprendiza Vs. Generalizacion para TANH
'''

proportions = [0.8, 0.85, 0.9, 0.95]

results = []

for proportion in proportions:
    (
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        name,
    ) = split_random_by_percentage_and_theta(
        proportion, config_tanh.theta, "tanh", config_tanh
    )

    results.append(run_iteration(
        train_dataset,
        train_dataset_outputs,
        test_dataset,
        test_dataset_outputs,
        config_tanh,
    ))

In [None]:
for i in range(len(proportions)):
    train_percentage = proportions[i] * 100
    test_percentage = 100 - train_percentage
    plot_arrays(
        [results[i].result.error_history, results[i].test_error_history],
        ["Error de entrenamiento", "Error de test"],
        f"Error de entrenamiento y test para {train_percentage}% entrenamiento y {test_percentage}% test para TANH",
        "Epocas",
        "Error",
    )