In [1]:
# data_modeling.py
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
import holidays
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns

def load_data():
    """Cargar los datos limpios"""
    with open(r'C:\Users\iazuaz\PyCharmMiscProject\model_RRSS\data\cleaned_data.pkl', 'rb') as f:
        df = pickle.load(f)
    return df

def create_date_features(df):
    """Creación de características basadas en fecha"""
    # Asegurar que Fecha es datetime
    df['Fecha'] = pd.to_datetime(df['Fecha'])

    # Características temporales
    df['dia_semana'] = df['Fecha'].dt.dayofweek  # 0=Lunes, 6=Domingo
    df['fin_de_semana'] = (df['dia_semana'] >= 5).astype(int)
    df['dia_mes'] = df['Fecha'].dt.day
    df['semana_mes'] = df['Fecha'].apply(lambda x: (x.day-1)//7 + 1)
    df['mes'] = df['Fecha'].dt.month
    df['trimestre'] = df['Fecha'].dt.quarter
    df['año'] = df['Fecha'].dt.year

    # Feriados en Chile
    chile_holidays = holidays.CountryHoliday('CL')
    df['es_feriado'] = df['Fecha'].apply(lambda x: x in chile_holidays).astype(int)

    # Día laboral
    df['dia_laboral'] = ((df['fin_de_semana'] == 0) & (df['es_feriado'] == 0)).astype(int)

    # Tendencia temporal
    df['dias_desde_inicio'] = (df['Fecha'] - df['Fecha'].min()).dt.days

    # Columna para eventos especiales (Cyber)
    df['Es_Cyber'] = 0  # Valor por defecto

    return df

def train_model(df):
    """Entrenamiento del modelo"""
    # Dividir en características (X) y objetivo (y)
    X = df.drop(['Consultas_Recibidas', 'Fecha'], axis=1)
    y = df['Consultas_Recibidas']

    # Dividir en conjuntos de entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, shuffle=False)

    # Crear pipeline del modelo
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('model', RandomForestRegressor(
            n_estimators=200,
            max_depth=15,
            min_samples_split=5,
            min_samples_leaf=2,
            random_state=42,
            n_jobs=-1
        ))
    ])

    # Entrenar el modelo
    pipeline.fit(X_train, y_train)

    return pipeline, X_test, y_test

def evaluate_model(model, X_test, y_test):
    """Evaluación del modelo"""
    y_pred = model.predict(X_test)

    metrics = {
        'MAE': mean_absolute_error(y_test, y_pred),
        'MSE': mean_squared_error(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred))
    }

    return metrics, y_pred

def save_model(model, version='v1'):
    """Guardar el modelo entrenado"""
    model_path = f'C:/Users/iazuaz/PyCharmMiscProject/model_RRSS/model/consultas_model_{version}.pkl'
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"Modelo guardado en {model_path}")

def main():
    # Cargar y preparar datos
    df = load_data()
    df = create_date_features(df)

    # Entrenar modelo
    model, X_test, y_test = train_model(df)

    # Evaluar modelo
    metrics, y_pred = evaluate_model(model, X_test, y_test)
    print("Métricas de evaluación:")
    for name, value in metrics.items():
        print(f"{name}: {value:.2f}")

    # Guardar modelo
    save_model(model, version='v1')

if __name__ == "__main__":
    main()

Métricas de evaluación:
MAE: 178.27
MSE: 79410.87
RMSE: 281.80
Modelo guardado en C:/Users/iazuaz/PyCharmMiscProject/model_RRSS/model/consultas_model_v1.pkl
