In [14]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
import joblib
import holidays

class VisitPredictor:
    def __init__(self):
        self.model = None
        self.le_canal = LabelEncoder()
        self.sucursales = []
        self.horario_min = 9
        self.horario_max = 21

    def load_data(self, filepath):
        # Cargar datos
        df = pd.read_csv(r'/sucursales\data\mar-abr.csv', sep=',')
        df['FECHA'] = pd.to_datetime(df['FECHA'], format='%d-%m-%Y')

        # Filtrar horas relevantes (9am a 9pm)
        df = df[(df['HORA'] >= self.horario_min) & (df['HORA'] <= self.horario_max)]

        # Codificar canal
        df['CANAL_2'] = self.le_canal.fit_transform(df['CANAL_2'])

        # Extraer sucursales únicas
        self.sucursales = df['COD_SUC'].unique().tolist()

        return df

    def preprocess_data(self, df):
        # Crear características temporales
        df['DIA_SEMANA'] = df['FECHA'].dt.dayofweek
        df['DIA_MES'] = df['FECHA'].dt.day
        df['MES'] = df['FECHA'].dt.month
        df['ES_FINDE'] = df['FECHA'].dt.dayofweek // 5

        # Agregar festivos de Argentina
        ar_holidays = holidays.Argentina()
        df['ES_FESTIVO'] = df['FECHA'].apply(lambda x: x in ar_holidays).astype(int)

        # Agrupar por fecha, hora y sucursal para contar visitas
        grouped = df.groupby(['COD_SUC', 'FECHA', 'HORA']).size().reset_index(name='VISITAS')
        features = df.groupby(['COD_SUC', 'FECHA', 'HORA']).first().reset_index()
        features = features[['COD_SUC', 'FECHA', 'HORA', 'DIA_SEMANA', 'DIA_MES', 'MES', 'ES_FINDE', 'ES_FESTIVO']]

        final_df = pd.merge(grouped, features, on=['COD_SUC', 'FECHA', 'HORA'])

        return final_df

    def train_model(self, df):
        # Preparar datos para entrenamiento
        X = df[['COD_SUC', 'HORA', 'DIA_SEMANA', 'DIA_MES', 'MES', 'ES_FINDE', 'ES_FESTIVO']]
        y = df['VISITAS']

        # Dividir datos
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, shuffle=False)

        # Entrenar modelo XGBoost
        self.model = xgb.XGBRegressor(
            objective='reg:squarederror',
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=6,
            subsample=0.9,
            colsample_bytree=0.7,
            early_stopping_rounds=50,
            random_state=42
        )

        self.model.fit(
            X_train, y_train,
            eval_set=[(X_test, y_test)],
            verbose=False
        )

        # Evaluar modelo
        preds = self.model.predict(X_test)
        mae = mean_absolute_error(y_test, preds)
        print(f"MAE del modelo: {mae:.2f}")

        return self.model

    def predict_next_week(self, sucursal):
        # Generar fechas para los próximos 7 días
        last_date = datetime.now().date()
        dates = [last_date + timedelta(days=i) for i in range(1, 8)]

        # Crear dataframe de predicción
        pred_data = []
        for date in dates:
            for hora in range(self.horario_min, self.horario_max + 1):
                dia_semana = date.weekday()
                dia_mes = date.day
                mes = date.month
                es_finde = 1 if dia_semana >= 5 else 0

                # Verificar si es festivo
                ar_holidays = holidays.Argentina()
                es_festivo = 1 if date in ar_holidays else 0

                pred_data.append([
                    sucursal, hora, dia_semana, dia_mes, mes, es_finde, es_festivo
                ])

        pred_df = pd.DataFrame(pred_data, columns=[
            'COD_SUC', 'HORA', 'DIA_SEMANA', 'DIA_MES', 'MES', 'ES_FINDE', 'ES_FESTIVO'
        ])

        # Realizar predicciones
        pred_df['VISITAS_PRED'] = self.model.predict(pred_df)
        pred_df['FECHA'] = [dates[h//(self.horario_max-self.horario_min+1)] for h in range(len(pred_df))]

        return pred_df[['COD_SUC', 'FECHA', 'HORA', 'VISITAS_PRED']]

    def save_model(self, path):
        joblib.dump({
            'model': self.model,
            'le_canal': self.le_canal,
            'sucursales': self.sucursales
        }, path)

    def load_model(self, path):
        data = joblib.load(path)
        self.model = data['model']
        self.le_canal = data['le_canal']
        self.sucursales = data['sucursales']
        return self

if __name__ == "__main__":
    predictor = VisitPredictor()

    # Cargar y preprocesar datos
    df = predictor.load_data('mar-abr.csv')
    processed_df = predictor.preprocess_data(df)

    # Entrenar modelo
    predictor.train_model(processed_df)

    # Guardar modelo
    predictor.save_model('visit_predictor_model.pkl')

    # Ejemplo de predicción para sucursal 0
    preds = predictor.predict_next_week(0)
    print("\nPredicciones para los próximos 7 días (Sucursal 0):")
    print(preds.head(10))

KeyError: 'FECHA'

In [5]:
    !pip install prophet




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
!pip install sklearn.preprocessing
!pip install sklearn.ensemble

Collecting sklearn.preprocessing
  Downloading sklearn_preprocessing-0.1.0-py3-none-any.whl.metadata (70 bytes)
Downloading sklearn_preprocessing-0.1.0-py3-none-any.whl (10 kB)
Installing collected packages: sklearn.preprocessing
Successfully installed sklearn.preprocessing-0.1.0



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: Could not find a version that satisfies the requirement sklearn.ensemble (from versions: none)

[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for sklearn.ensemble
