## Instalar dependencias

In [6]:
!pip install -r 'app/requirements.txt' -qq
print('Se han instalado las dependencias')

Collecting absl-py==2.1.0 (from -r app/requirements.txt (line 1))
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting anyio==4.8.0 (from -r app/requirements.txt (line 2))
  Downloading anyio-4.8.0-py3-none-any.whl.metadata (4.6 kB)
Collecting argon2-cffi==23.1.0 (from -r app/requirements.txt (line 3))
  Downloading argon2_cffi-23.1.0-py3-none-any.whl.metadata (5.2 kB)
Collecting argon2-cffi-bindings==21.2.0 (from -r app/requirements.txt (line 4))
  Downloading argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting arrow==1.3.0 (from -r app/requirements.txt (line 5))
  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)
Collecting asttokens==3.0.0 (from -r app/requirements.txt (line 6))
  Downloading asttokens-3.0.0-py3-none-any.whl.metadata (4.7 kB)
Collecting astunparse==1.6.3 (from -r app/requirements.txt (line 7))
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting as

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
from sklearn import linear_model
from sklearn.metrics import classification_report

## Funciones preprocesamiento

In [15]:
def load_and_clean(path,filename,feature_selection):
    
    df_lter = pd.read_csv(path + filename)
    df_lter_sample = df_lter.copy()
    df_lter_sample = df_lter_sample[feature_selection]
    df_lter_sample = df_lter_sample.dropna()
    #try:
    #    df_lter_sample = df_lter_sample[df_lter_sample['Sex'] != '.']
    #except ValueError:
    #    None
    print("No hay valores perdidos" if df_lter_sample.isna().sum().sum() == 0 else "Existe valores perdidos")
    
    return df_lter_sample

def transform_variables(df_lter_sample,variables_categoricas,variables_continuas):

    # Label Encoder

    le = LabelEncoder()
    for variable in variables_categoricas:
        df_lter_sample[variable] = le.fit_transform(df_lter_sample[variable])
    
    # Scale continuous variables

    scaler = MinMaxScaler()
    df_lter_sample[variables_continuas] = scaler.fit_transform(df_lter_sample[variables_continuas])
    
    return df_lter_sample


In [16]:
def data_split_model(data_path,size_filename,features,variables_categoricas,variables_continuas,data_output):
    # Load and Clean
    df = load_and_clean(data_path,size_filename,features)
    # Transform
    df = transform_variables(df,variables_categoricas,variables_continuas)
    # Train Test Split
    
    y = df[['Species']]
    X = df[['Culmen Length (mm)', 'Culmen Depth (mm)',
       'Flipper Length (mm)', 'Body Mass (g)']]

    X_train, X_test,y_train, y_test = train_test_split(X,y , 
                                   random_state=50,  
                                   test_size=0.30) 

    return X_train, X_test,y_train, y_test

## Preprocesar datos

In [17]:
# Parametros
size_filename = "penguins_lter.csv"
    
data_path = "data/"
data_output = "app/models/"
    
features = ['Culmen Length (mm)','Culmen Depth (mm)',
                'Flipper Length (mm)','Body Mass (g)','Species']
    
variables_continuas = ['Culmen Length (mm)','Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
variables_categoricas = ['Species']

X_train, X_test,y_train, y_test = data_split_model(data_path,size_filename,features,
                                                   variables_categoricas,variables_continuas,data_output)

No hay valores perdidos


## Entrenamiento de Modelos

In [None]:
# Train Model Logistic Regression
    
model = linear_model.LogisticRegression(multi_class='ovr', solver='liblinear')
model.fit(X_train, y_train)
    
joblib.dump(model, data_output + "model_logreg.pkl")
logreg_model = joblib.load(data_output + "model_logreg.pkl")
    
y_pred = logreg_model.predict(X_test)
 
target_names = ['Adelie', 'Gentoo', 'Chinstrap']

print(classification_report(y_test, y_pred, target_names=target_names))
print('-------------------------')
print('Entrenamiento de modelo finalizado')