In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from src.evaluation.evaluation import calculate_metrics, export_model
from sklearn.metrics import mean_squared_error
from sklearn import tree, ensemble
import numpy as np
import pickle
from src.modelization.models_utils import get_pipeline
from src.constants import BASE_PATH_EXPERIMENTS, PATH_EVALUATION_DF_WITH_METRICS_CSV, PATH_EVALUATION_CSV, PATH_TRAIN, PATH_TEST
from datetime import datetime
pd.options.display.float_format = '{:.2f}'.format

In [2]:
#Read Data
df_train = pd.read_csv(PATH_TRAIN)
df_test = pd.read_csv(PATH_TEST)
evaluation_df_with_metrics = pd.read_csv(PATH_EVALUATION_DF_WITH_METRICS_CSV)
evaluation = pd.read_csv(PATH_EVALUATION_CSV)

In [3]:
columns_to_drop=['precio_unitario_m2', 'geometry', 'precio_logaritmico', 'barrio_id', 'barrio']
df_train = df_train.drop(columns=columns_to_drop)
df_test = df_test.drop(columns=columns_to_drop)

In [5]:
# Call the calculate_metrics function
baseline_model = 'Precio medio por barrio'
metrics_df, df_test_with_metrics = calculate_metrics(df_test['precio'], df_test['precio_mean_barrio'], df_test, model_name=baseline_model)
df_test_with_metrics['model_name'] = baseline_model
evaluation_df_with_metrics = pd.concat([evaluation_df_with_metrics, df_test_with_metrics], ignore_index=True)
evaluation =pd.concat([evaluation, metrics_df], ignore_index=True)

In [5]:
X_train = df_train.drop(columns=['precio'])
y_train = df_train['precio']
X_test = df_test.drop(columns=['precio'])
y_test = df_test['precio']

In [23]:
models_to_test = {'RandomForest': ensemble.RandomForestRegressor()}

for model_name, model in models_to_test.items():
    # Create pipeline
    pipeline = get_pipeline(
        base_model=model,
        impute=True,  
        scale=True,  
        encode=True,
        num_features=X_train.columns.to_list()
    )
        
    # Fit the pipeline and make predictions
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    # Calculate metrics
    metrics_df, df_test_with_metrics = calculate_metrics(y_test, y_pred, df_test, model_name)

    # Add model_name column to df_test_with_metrics
    df_test_with_metrics['model_name'] = model_name
    df_test_with_metrics['model_folder'] = f"experiment_{model_name}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"

    # Append df_test_with_metrics to all_df_test_with_metrics
    evaluation_df_with_metrics = pd.concat([evaluation_df_with_metrics, df_test_with_metrics], ignore_index=True)

    # Append metrics_df to all_metrics_df
    evaluation =pd.concat([evaluation, metrics_df], ignore_index=True)

    #Export Model
    output_folder = export_model(
    model=model,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
    base_path=BASE_PATH_EXPERIMENTS,
    save_model=True,     
    save_datasets=True,  
    zip_files=True      
    )

# Save the DataFrames to CSV files
evaluation.to_csv(PATH_EVALUATION_CSV, index=False)
evaluation_df_with_metrics.to_csv(PATH_EVALUATION_DF_WITH_METRICS_CSV, index=False)


