# 📈 Feature Engineering

In [1]:
import pandas as pd
import numpy as np
import holidays

In [2]:
chain_campaigns = pd.read_csv('/content/drive/MyDrive/LDSSA/Capstone project/chain_campaigns.csv')
product_prices = pd.read_csv('/content/drive/MyDrive/LDSSA/Capstone project/product_prices_leaflets.csv')
product_structures = pd.read_csv('/content/drive/MyDrive/LDSSA/Capstone project/product_structures_sales.csv')

chain_campaigns['start_date'] = pd.to_datetime(chain_campaigns['start_date'])
chain_campaigns['end_date'] = pd.to_datetime(chain_campaigns['end_date'])
product_prices['time_key'] = pd.to_datetime(product_prices['time_key'].astype(str), format='%Y%m%d')
# product_structures['time_key'] = pd.to_datetime(product_structures['time_key'].astype(str), format='%Y%m%d')

In [None]:
def create_features(competitor, product_prices, chain_campaigns, product_structures, other_competitors=['competitorB', 'chain']):
    """
    Cria todas as features para um competidor específico
    Args:
        competitor: Nome do competidor (ex: 'competitorA')
        product_prices: DataFrame com preços dos produtos
        chain_campaigns: DataFrame com campanhas
        product_structures: DataFrame com estruturas dos produtos
        other_competitors: Lista de concorrentes para incluir como features
    Returns:
        DataFrame com features e target
    """
    # 1. Filtrar dados do competidor
    df = filter_competitor_data(product_prices, competitor)

    # 2. Engenharia de features temporais
    df = add_temporal_features(df)

    # 3. Features de séries temporais
    df = add_time_series_features(df)

    # 4. Codificar leaflet_type
    df = encode_leaflet(df)

    # 5. Features de campanha
    df = add_campaign_features(df, chain_campaigns, competitor)

    # 6. Adicionar categoria do produto
    df = add_product_category(df, product_structures)

    # 7. Adicionar preços dos concorrentes
    df = add_competitor_prices(df, product_prices, other_competitors)

    # 8. Definir target e tratar dados faltantes
    df = df.dropna(subset=['pvp_was']).reset_index(drop=True)

    return df

def filter_competitor_data(df, competitor):
    """Filtra dados do competidor e prepara estrutura básica"""
    return (
        df[df['competitor'] == competitor]
        .copy()
        .sort_values(['sku', 'time_key'])
        .assign(time_key=lambda x: pd.to_datetime(x['time_key']))
    )

def add_temporal_features(df):
    """Adiciona features relacionadas ao calendário"""
    pt_holidays = holidays.Portugal()

    return df.assign(
        day_of_month=lambda x: x['time_key'].dt.day,
        day_of_week=lambda x: x['time_key'].dt.dayofweek,
        month=lambda x: x['time_key'].dt.month,
        holiday_flag=lambda x: x['time_key'].isin(pt_holidays).astype(int)
    )

def add_time_series_features(df):
    """Calcula lags e estatísticas móveis"""
    grouper = df.groupby('sku')

    # Lags
    #for lag in [7, 14, 30]:
    #    df[f'lag_{lag}'] = grouper['pvp_was'].shift(lag)

    # Rolling statistics
    #windows = [1, 7, 14, 30]
    #for w in windows:
    #    df[f'rolling_mean_{w}'] = grouper['pvp_was'].transform(lambda x: x.rolling(w, min_periods=1).mean())

    for w in [7, 14, 30]:
        df[f'rolling_std_{w}'] = grouper['pvp_was'].transform(lambda x: x.rolling(w, min_periods=1).std())

    return df

def encode_leaflet(df):
    """Codifica o leaflet_type de forma segura"""
    leaflet_mapping = {
        'themed': 1,
        'weekly': 2,
        'short': 3
    }
    return df.assign(
        leaflet=lambda x: x['leaflet'].map(leaflet_mapping).fillna(0).astype('category')
    )

def add_campaign_features(df, chain_campaigns, competitor):
    """Adiciona features relacionadas a campanhas"""
    # Filtra campanhas do competidor e remove duplicatas
    campaigns = (
        chain_campaigns[chain_campaigns['competitor'] == competitor]
        [['start_date', 'end_date', 'chain_campaign']]
        .drop_duplicates()
    )
    # Cria um DataFrame com todas as datas de campanha
    date_ranges = []
    for _, row in campaigns.iterrows():
        dates = pd.date_range(row['start_date'], row['end_date'], freq='D')
        date_ranges.extend([(date, row['chain_campaign']) for date in dates])

    campaign_dates = pd.DataFrame(date_ranges, columns=['time_key', 'campaign_code'])

    # Merge com o DataFrame principal
    df = df.merge(
        campaign_dates,
        on='time_key',
        how='left'
    )

    # Cria flags e codificação
    df['campaign_active'] = df['campaign_code'].notna().astype(int)

    # Codifica os tipos de campanha (A1 -> 1, A2 -> 2, C1 -> 3, etc.)
    df['campaign_type'] = (
        df['campaign_code']
        .str.extract(r'([A-Za-z]+)')[0]  # Extrai a letra (A, C, etc.)
        .astype('category')
        .cat.codes
        .add(1)
        .fillna(0)
        .astype(int)
    )

    return df.drop(columns=['campaign_code'])

def add_product_category(df, product_structures):
    """Adiciona informações da estrutura do produto"""
    sku_map = product_structures.set_index('sku')['structure_level_2'].to_dict()
    return df.assign(structure_level_2=lambda x: x['sku'].map(sku_map))

def add_competitor_prices(df, product_prices, other_competitors):
    """Adiciona preços dos concorrentes como features"""
    for comp in other_competitors:
        comp_prices = (
            product_prices[product_prices['competitor'] == comp]
            .rename(columns={'pvp_was': f'pvp_was_{comp}'})
            [['time_key', 'sku', f'pvp_was_{comp}']]
        )
        df = df.merge(comp_prices, on=['time_key', 'sku'], how='left')

    return df

In [5]:
# Para o Competitor A
df_competitorA = create_features(
    competitor='competitorA',
    product_prices=product_prices,
    chain_campaigns=chain_campaigns,
    product_structures=product_structures,
    other_competitors=['competitorB', 'chain']
)

In [6]:
# Para o Competitor B
df_competitorB = create_features(
    competitor='competitorB',
    product_prices=product_prices,
    chain_campaigns=chain_campaigns,
    product_structures=product_structures,
    other_competitors=['competitorA', 'chain']
)

In [7]:
def additional_features(df, product_prices, current_competitor):
    """
    Adiciona features avançadas de comparação de preços para qualquer competidor

    Args:
        df: DataFrame principal com dados do competidor atual
        product_prices: DataFrame completo com preços de todos os concorrentes
        current_competitor: Nome do competidor atual ('competitorA' ou 'competitorB')

    Returns:
        DataFrame com as novas features adicionadas
    """
    # 1. Identifica os outros competidores automaticamente
    all_competitors = product_prices['competitor'].unique()
    other_competitors = [c for c in all_competitors if c != current_competitor and c in ['chain', 'competitorA', 'competitorB']]

    # 2. Adiciona deltas para cada concorrente
    for competitor in other_competitors:
        col_name = f'pvp_was_{competitor}'
        delta_col = f'delta_price_{competitor}'

        # Ensure the competitor price column exists before calculating delta
        if col_name in df.columns:
            df[delta_col] = df['pvp_was'] - df[col_name]
        else:
            # Optionally, handle cases where competitor price column is missing
            # print(f"Warning: Column '{col_name}' not found in df. Skipping delta calculation for {competitor}.")
            pass # Or df[delta_col] = np.nan if you want the column

    # 3. Lags de preço dos outros concorrentes
    for competitor in other_competitors:
        # Select data for the current competitor being processed
        comp_data = product_prices[product_prices['competitor'] == competitor].copy()

        # Ensure time_key is datetime and sort
        comp_data['time_key'] = pd.to_datetime(comp_data['time_key'])
        comp_data = comp_data.sort_values(['sku', 'time_key'])

        # Calculate lags using transform
        comp_data[f'lag1_price_{competitor}'] = comp_data.groupby('sku')['pvp_was'].transform(lambda x: x.shift(1))
        comp_data[f'lag7_price_{competitor}'] = comp_data.groupby('sku')['pvp_was'].transform(lambda x: x.shift(7))

        # Select only necessary columns for merging
        lag_cols_to_merge = ['time_key', 'sku', f'lag1_price_{competitor}', f'lag7_price_{competitor}']

        # Merge the calculated lags back to the main df
        df = df.merge(comp_data[lag_cols_to_merge], on=['time_key', 'sku'], how='left')

        # Calcula deltas temporais - ensure lag columns exist
        if f'lag1_price_{competitor}' in df.columns:
             df[f'delta_{competitor}_lag1'] = df['pvp_was'] - df[f'lag1_price_{competitor}']
        else:
             df[f'delta_{competitor}_lag1'] = np.nan # Or handle appropriately

        if f'lag7_price_{competitor}' in df.columns:
            df[f'delta_{competitor}_lag7'] = df['pvp_was'] - df[f'lag7_price_{competitor}']
        else:
            df[f'delta_{competitor}_lag7'] = np.nan # Or handle appropriately


    # 4. Variáveis booleanas de posicionamento competitivo
    comparison_cols = [f'pvp_was_{c}' for c in other_competitors if f'pvp_was_{c}' in df.columns]

    if len(comparison_cols) > 0:
        # Need to handle potential NaNs in comparison columns when calculating min/max
        df['is_cheapest'] = (df['pvp_was'] < df[comparison_cols].min(axis=1)).astype(int)
        df['is_most_expensive'] = (df['pvp_was'] > df[comparison_cols].max(axis=1)).astype(int)

        for competitor in other_competitors:
            col = f'pvp_was_{competitor}'
            if col in df.columns:
                # Need to handle potential NaNs in comparison column
                df[f'is_cheaper_than_{competitor}'] = (df['pvp_was'] < df[col]).astype(int)

    # 5. Calcula posição relativa (ranking)
    if len(comparison_cols) > 0:
        price_cols = ['pvp_was'] + comparison_cols
         # Need to handle potential NaNs when ranking
        df['price_rank'] = df[price_cols].rank(axis=1, method='min', na_option='bottom')['pvp_was'] # na_option handles NaNs

    return df

In [8]:
# Para competitorA
df_competitorA = additional_features(df_competitorA, product_prices, 'competitorA')

In [9]:
# Para competitorB
df_competitorB = additional_features(df_competitorB, product_prices, 'competitorB')

## Dealing with NaN values

In [10]:
def check_missing(df, name=""):
    missing = df.isnull().sum()
    missing = missing[missing > 0]
    if len(missing) == 0:
        print(f"{name}: Nenhum NaN encontrado!")
    else:
        print(f"{name}: \n{missing}")

check_missing(df_competitorA, "Competitor A")
check_missing(df_competitorB, "Competitor B")

Competitor A: 
lag_7                        17551
lag_14                       34399
lag_30                       73763
rolling_std_7                 2380
rolling_std_14                2380
rolling_std_30                2380
pvp_was_competitorB        1011367
delta_price_competitorB    1011367
lag1_price_chain              1986
lag7_price_chain             14291
delta_chain_lag1              1986
delta_chain_lag7             14291
lag1_price_competitorB     1012018
lag7_price_competitorB     1016330
delta_competitorB_lag1     1012018
delta_competitorB_lag7     1016330
dtype: int64
Competitor B: 
lag_7                       6952
lag_14                     13236
lag_30                     27465
rolling_std_7                924
rolling_std_14               924
rolling_std_30               924
pvp_was_competitorA        57532
delta_price_competitorA    57532
lag1_price_chain             554
lag7_price_chain            3914
delta_chain_lag1             554
delta_chain_lag7            3914
l

In [11]:
def handle_missing_values_ffill(df_competitorA, df_competitorB, product_prices):
    """
    Preenche NaN nos preços de concorrentes com o último valor conhecido (forward fill)
    e remove flags de imputação.
    """
    # Dicionário para processamento em lote
    dfs = {'competitorA': df_competitorA, 'competitorB': df_competitorB}

    for name, df in dfs.items():
        current_comp = name  # 'competitorA' ou 'competitorB'
        other_comp = 'competitorB' if current_comp == 'competitorA' else 'competitorA'

        # 1. Forward fill para preços de concorrentes (ordena por time_key primeiro)
        df.sort_values(['sku', 'time_key'], inplace=True)

        for competitor in ['chain', other_comp]:
            col = f'pvp_was_{competitor}'
            if col in df.columns:
                # FFill por SKU (último valor conhecido do mesmo produto)
                df[col] = df.groupby('sku')[col].ffill()

                # Se ainda houver NaN (produtos sem histórico), preenche com a média global
                global_avg = product_prices[product_prices['competitor'] == competitor]['pvp_was'].mean()
                df[col] = df[col].fillna(global_avg)

        # 2. Lags próprios (forward fill)
        for lag in [7, 14, 30]:
            df[f'lag_{lag}'] = df.groupby('sku')[f'lag_{lag}'].ffill()

        # 3. Rolling statistics (preenche com a média móvel)
        for window in [7, 14, 30]:
            roll_col = f'rolling_std_{window}'
            df[roll_col] = df.groupby('sku')[roll_col].transform(
                lambda x: x.fillna(x.rolling(window, min_periods=1).std()))

        # 4. Recalcula todos os deltas
        delta_pairs = [
            (f'delta_price_{other_comp}', f'pvp_was_{other_comp}'),
            ('delta_price_chain', 'pvp_was_chain'),
            (f'delta_{other_comp}_lag1', f'lag1_price_{other_comp}'),
            (f'delta_{other_comp}_lag7', f'lag7_price_{other_comp}'),
            ('delta_chain_lag1', 'lag1_price_chain'),
            ('delta_chain_lag7', 'lag7_price_chain')
        ]

        for delta_col, price_col in delta_pairs:
            if delta_col in df.columns and price_col in df.columns:
                df[delta_col] = df['pvp_was'] - df[price_col]

        # 5. Remove colunas de flag de imputação se existirem
        imputed_cols = [col for col in df.columns if '_imputed' in col]
        df.drop(columns=imputed_cols, errors='ignore', inplace=True)

        # 6. Preenchimento final residual com 0 (apenas para features calculadas)
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        df[numeric_cols] = df[numeric_cols].fillna(0)

        # Atualiza o DataFrame
        dfs[name] = df

    return dfs['competitorA'], dfs['competitorB']

In [12]:
df_competitorA_clean, df_competitorB_clean = handle_missing_values_ffill(df_competitorA, df_competitorB, product_prices)

In [15]:
def check_missing(df, name=""):
    missing = df.isnull().sum()
    missing = missing[missing > 0]
    if len(missing) == 0:
        print(f"{name}: Nenhum NaN encontrado!")
    else:
        print(f"{name}: \n{missing}")

check_missing(df_competitorA_clean, "Competitor A")
check_missing(df_competitorA_clean, "Competitor B")

Competitor A: Nenhum NaN encontrado!
Competitor B: Nenhum NaN encontrado!


# 🚀 Model testing

In [13]:
df_competitorA_clean = df_competitorA_clean.drop(columns='competitor')
df_competitorB_clean = df_competitorB_clean.drop(columns='competitor')

In [14]:
df_competitorA_clean = df_competitorA_clean.sort_values(['time_key', 'sku'])
df_competitorB_clean = df_competitorB_clean.sort_values(['time_key', 'sku'])

In [None]:
from sklearn.model_selection import train_test_split

# For competitorA
features_A = [col for col in df_competitorA_clean.columns
            if col not in ['time_key', 'pvp_was', 'sku', 
                           # 'rolling_mean_1', 'rolling_mean_7', 'rolling_mean_14', 'rolling_mean_30',
                           # lag_7', 'lag_14', 'lag_30'
                            ]]
X_A = df_competitorA_clean[features_A]
y_A = df_competitorA_clean['pvp_was']

X_train_A, X_valid_A, y_train_A, y_valid_A = train_test_split(
    X_A, y_A, test_size=0.2, shuffle=False)


# For competitorB
features_B = [col for col in df_competitorB_clean.columns
            if col not in ['time_key', 'pvp_was', 'sku', 
                           # 'rolling_mean_1', 'rolling_mean_7', 'rolling_mean_14', 'rolling_mean_30',
                           # 'lag_7', 'lag_14', 'lag_30'
                           ]]

X_B = df_competitorB_clean[features_B]
y_B = df_competitorB_clean['pvp_was']

X_train_B, X_valid_B, y_train_B, y_valid_B = train_test_split(
    X_B, y_B, test_size=0.2, shuffle=False)

### 💡 LightGBM

In [17]:
# For competitorA
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

# Dataset para LightGBM
train_data = lgb.Dataset(X_train_A, label=y_train_A)
valid_data = lgb.Dataset(X_valid_A, label=y_valid_A)

# Parâmetros básicos
params = {
    'objective': 'regression',
    'metric': 'mae',
    'learning_rate': 0.05,
    'num_leaves': 31,
    'verbose': -1,
    'random_state': 42
}

# Treinamento com early stopping
# Use the new callbacks API for early stopping
modelA = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, valid_data],
    num_boost_round=1000,
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=True)], # Use the new callback
    # verbose_eval=100 # Removed this argument
)

# Previsão e avaliação
y_pred_lgb_A = modelA.predict(X_valid_A)
mae = mean_absolute_error(y_valid_A, y_pred_lgb_A)
mape = mean_absolute_percentage_error(y_valid_A, y_pred_lgb_A)

print(f"[LightGBM A] MAE: {mae:.2f} | MAPE: {mape:.2%}")

Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	training's l1: 0.108235	valid_1's l1: 0.321805
[LightGBM A] MAE: 0.32 | MAPE: 0.42%


In [18]:
# For competitorB

# Dataset para LightGBM
train_data = lgb.Dataset(X_train_B, label=y_train_B)
valid_data = lgb.Dataset(X_valid_B, label=y_valid_B)

# Parâmetros básicos
params = {
    'objective': 'regression',
    'metric': 'mae',
    'learning_rate': 0.05,
    'num_leaves': 31,
    'verbose': -1,
    'random_state': 42
}

# Treinamento com early stopping
# Use the new callbacks API for early stopping
modelB = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, valid_data],
    num_boost_round=1000,
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=True)], # Use the new callback
    # verbose_eval=100 # Removed this argument
)

# Previsão e avaliação
y_pred_lgb_B = modelB.predict(X_valid_B)
mae = mean_absolute_error(y_valid_B, y_pred_lgb_B)
mape = mean_absolute_percentage_error(y_valid_B, y_pred_lgb_B)

print(f"[LightGBM B] MAE: {mae:.2f} | MAPE: {mape:.2%}")

Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	training's l1: 0.095008	valid_1's l1: 2.73842
[LightGBM B] MAE: 2.74 | MAPE: 0.95%


In [19]:
import pickle
import os
# Get the current working directory
current_dir = os.getcwd()

# Define the full paths for saving the dtypes files
A_path = os.path.join(current_dir, 'Adtypes.pickle')

# Create dtypes.pickle for A
dtypes_A = X_train_A.dtypes.to_dict()
with open(A_path, 'wb') as f:
    pickle.dump(dtypes_A, f)

print(f"Data types for Competitor A saved to: {A_path}")

Data types for Competitor A saved to: /content/Adtypes.pickle


In [None]:
# Get the current working directory
current_dir = os.getcwd()

# Define the full paths for saving the dtypes files
B_path = os.path.join(current_dir, 'Bdtypes.pickle')

# Create dtypes.pickle for A
dtypes_B = X_train_B.dtypes.to_dict()
with open(B_path, 'wb') as f:
    pickle.dump(dtypes_B, f)

print(f"Data types for Competitor B saved to: {B_path}")

In [None]:
def generate_features_for_api(sku, target_date, product_prices, chain_campaigns, product_structures, competitor, original_dtypes):
    """
    Gera features para predição com base em sku e data futura.
    Usa como referência o histórico até a última data conhecida.
    Inclui lógica para alinhar colunas e dtypes com os dados de treino.
    """
    import pandas as pd
    import holidays
    import numpy as np # Import numpy for fillna

    target_date = pd.to_datetime(target_date)
    # Última data com dados disponíveis
    last_obs_date = product_prices['time_key'].max()

    # Filtrar histórico do SKU até a última data conhecida
    history = product_prices[
        (product_prices['sku'] == sku) &
        (product_prices['time_key'] <= last_obs_date) &
        (product_prices['competitor'] == competitor)
    ].copy()

    if history.empty:
        # If no history, create a base row with essential info and NaNs for pvp_was
        row = {
            'sku': sku,
            'time_key': target_date,
            'competitor': competitor,
            'leaflet': None,
            'pvp_was': np.nan # Need pvp_was for time series features, will be NaN for future
        }
        df_target = pd.DataFrame([row])
        # Concatenate an empty history with the target row to ensure functions work
        df_all = pd.concat([history, df_target], ignore_index=True)
    else:
         history = history.sort_values('time_key')
         # Linha fictícia para previsão futura
         row = {
             'sku': sku,
             'time_key': target_date,
             'competitor': competitor,
             'leaflet': None, # Assuming leaflet info might not be available for future
             'pvp_was': np.nan # Target value for the future is unknown
         }
         df_target = pd.DataFrame([row])
         # Concatenate history with line for future date
         df_all = pd.concat([history, df_target], ignore_index=True)


    # Add temporary 'pvp_was' column if missing for feature engineering
    if 'pvp_was' not in df_all.columns:
         df_all['pvp_was'] = np.nan # Add as NaN, will be dropped later

    # Apply feature engineering functions
    # Ensure time_key is datetime
    df_all['time_key'] = pd.to_datetime(df_all['time_key'])
    df_all = df_all.sort_values(['sku', 'time_key']) # Re-sort after concat

    # Call the feature engineering functions sequentially
    df_all = add_temporal_features(df_all)
    df_all = add_product_category(df_all, product_structures)
    df_all = add_campaign_features(df_all, chain_campaigns, competitor)

    # Handle potential NaNs in leaflet BEFORE encoding
    # Fill with a placeholder that will be mapped to 0 or NaN
    df_all['leaflet'] = df_all['leaflet'].fillna('unknown') # Use 'unknown' or similar

    df_all = encode_leaflet(df_all)

    df_all = add_time_series_features(df_all)
    df_all = add_competitor_prices(df_all, product_prices, ['chain', 'competitorA', 'competitorB'])
    df_all = additional_features(df_all, product_prices, current_competitor=competitor)

    # Filter to get only the target date row
    df_pred = df_all[df_all['time_key'] == target_date].copy()

    # Apply missing value handling (simplified for a single row prediction)
    # For a single future row, ffill doesn't make sense without preceding data.
    # We should fill NaNs in the prediction row with a sensible value, e.g., 0 or mean from training data.
    # Given your original ffill logic, filling remaining NaNs with 0 seems reasonable for new data points.
    numeric_cols = df_pred.select_dtypes(include=[np.number]).columns
    df_pred[numeric_cols] = df_pred[numeric_cols].fillna(0)


    # Align columns and dtypes with the training data
    # Drop columns not in original training data
    cols_to_keep = original_dtypes.keys()
    df_pred = df_pred[[col for col in df_pred.columns if col in cols_to_keep]]

    # Reindex to match original training data columns order
    df_pred = df_pred.reindex(columns=cols_to_keep)

    # Convert dtypes to match original training data
    for col, dtype in original_dtypes.items():
        if col in df_pred.columns:
             if pd.api.types.is_categorical_dtype(dtype):
                 # Handle categorical data - need to ensure categories are the same
                 # For prediction, we can use the categories from the training data
                 # Assuming dtypes dictionary was created after OHE or cat conversion
                 # If original_dtypes saves 'category', need the actual categories
                 # A simpler approach is to rely on LightGBM's handling if the dtype is 'category'
                 # Ensure the column is indeed 'category' dtype
                 df_pred[col] = df_pred[col].astype(dtype) # This will raise error if new category is present
             else:
                 df_pred[col] = df_pred[col].astype(dtype)

    # Drop target and identifier columns used during feature creation but not for prediction
    final_drop_cols = ['pvp_was', 'time_key', 'sku', 'competitor']
    df_pred = df_pred.drop(columns=[col for col in final_drop_cols if col in df_pred.columns], errors='ignore')


    return df_pred

# Load the dtypes for Competitor A's training data
try:
    # Get the current working directory to find the pickle file
    current_dir = os.getcwd()
    A_dtypes_path = os.path.join(current_dir, 'Adtypes.pickle')
    with open(A_dtypes_path, 'rb') as f:
        original_dtypes_A = pickle.load(f)
except FileNotFoundError:
    print("Error: 'Adtypes.pickle' not found. Please ensure it exists.")
    original_dtypes_A = None # Handle case where file is missing

# Run the prediction loop using the modified function
if original_dtypes_A is not None:
    for date in future_dates:
        X_input = generate_features_for_api(
            sku=sku_sample,
            target_date=date,
            product_prices=product_prices,
            chain_campaigns=chain_campaigns,
            product_structures=product_structures,
            competitor='competitorA',
            original_dtypes=original_dtypes_A # Pass the original dtypes
        )

        if X_input is not None and not X_input.empty:
            try:
                y_pred = modelA.predict(X_input)[0]
                print(f"[{date.date()}] Preço previsto para SKU {sku_sample}: {y_pred:.2f}")
            except Exception as e:
                print(f"Error during prediction for date {date.date()}: {e}")
                print("X_input columns:", X_input.columns.tolist())
                print("X_input dtypes:", X_input.dtypes.to_dict())
                print("ModelA expected features:", modelA.feature_name())
                print("ModelA expected dtypes:", modelA.feature_dtype())

        else:
            print(f"[{date.date()}] Não foi possível gerar predição para SKU {sku_sample} ou input dataframe é vazio.")

  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


[2024-11-04] Preço previsto para SKU 4250: 4.87


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


[2024-11-11] Preço previsto para SKU 4250: 4.87
[2024-11-27] Preço previsto para SKU 4250: 4.87


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


In [25]:
skus_para_testar = df_competitorA_clean['sku'].value_counts().head(5).index.tolist()

In [None]:
df_preds = pd.DataFrame(results)

#### Trabalhando em price_predictions.csv

In [27]:
new_data = pd.read_csv('price_predictions.csv')
new_data

Unnamed: 0,id,sku,time_key,pvp_is_competitorA,pvp_is_competitorB,pvp_is_competitorA_actual,pvp_is_competitorB_actual,created_at
0,1,1173,20230501,123.0,77.0,99.99,99.99,2025-05-26 17:34:10.781955
1,2,sku_errado,20230501,132.0,108.0,99.99,99.99,2025-05-26 17:35:37.572521
2,3,123,20230501,107.0,76.0,,,2025-05-26 17:53:42.726221
3,4,1175,20250501,146.0,153.0,,,2025-05-26 17:55:01.332724
4,6,2506,20241107,74.0,120.0,34.54,34.54,2025-05-26 20:00:12.085541
...,...,...,...,...,...,...,...,...
999,1000,4667,20241225,136.0,71.0,,,2025-05-26 21:29:43.605987
1000,1001,3553,20241204,66.0,68.0,,,2025-05-26 21:29:46.806407
1001,1002,3870,20241203,144.0,66.0,,,2025-05-26 21:29:50.264213
1002,1003,2239,20241219,127.0,113.0,,,2025-05-26 21:29:53.462784


In [29]:
new_data.sort_values(['created_at'], inplace=True)
new_data

Unnamed: 0,id,sku,time_key,pvp_is_competitorA,pvp_is_competitorB,pvp_is_competitorA_actual,pvp_is_competitorB_actual,created_at
0,1,1173,20230501,123.0,77.0,99.99,99.99,2025-05-26 17:34:10.781955
1,2,sku_errado,20230501,132.0,108.0,99.99,99.99,2025-05-26 17:35:37.572521
2,3,123,20230501,107.0,76.0,,,2025-05-26 17:53:42.726221
3,4,1175,20250501,146.0,153.0,,,2025-05-26 17:55:01.332724
361,5,4555,20241107,81.0,137.0,34.54,34.54,2025-05-26 20:00:10.560478
...,...,...,...,...,...,...,...,...
999,1000,4667,20241225,136.0,71.0,,,2025-05-26 21:29:43.605987
1000,1001,3553,20241204,66.0,68.0,,,2025-05-26 21:29:46.806407
1001,1002,3870,20241203,144.0,66.0,,,2025-05-26 21:29:50.264213
1002,1003,2239,20241219,127.0,113.0,,,2025-05-26 21:29:53.462784


In [31]:
# prompt: Using dataframe new_data: cria uma nova tabela somente com created_at > 	2025-05-26 20:29:56.532894

# Ensure the 'created_at' column is in datetime format
new_data['created_at'] = pd.to_datetime(new_data['created_at'])

# Create a new dataframe containing only rows where 'created_at' is greater than the specified datetime
new_data_without_dummies = new_data[new_data['created_at'] > '2025-05-26 20:30:00']
len(new_data_without_dummies)

500

In [None]:
new_data_without_dummies.sort_values(['created_at'], inplace=True)

# Retira dados com NaN
new_data_without_dummies_and_nans = new_data_without_dummies.dropna()

#Retira a coluna created_at e id
new_data_without_dummies_and_nans = new_data_without_dummies_and_nans.drop(columns=['created_at', 'id'])


In [None]:
def ensure_sku_is_str(*dfs):
    for df in dfs:
        if 'sku' in df.columns:
            df['sku'] = df['sku'].astype(str)
            
# Duplicar df para evitar sobrescrita e garantir independência
df_eval = new_data_without_dummies.copy()
ensure_sku_is_str(product_prices, product_structures, df_eval)

In [None]:
# Compara os resultados de predição com os dados de avaliação

# Criar colunas para novas predições
df_eval['pvp_api_modelA'] = np.nan
df_eval['pvp_api_modelB'] = np.nan

for idx, row in df_eval.iterrows():
    sku = row['sku']
    date = row['time_key']

    try:
        # Previsão para Competitor A
        X_A = generate_features_for_api(sku, date, product_prices, chain_campaigns,
                                        product_structures, 'competitorA', original_dtypes_A)
        if X_A is not None and not X_A.empty:
            df_eval.loc[idx, 'pvp_api_modelA'] = modelA.predict(X_A)[0]

        # Previsão para Competitor B
        X_B = generate_features_for_api(sku, date, product_prices, chain_campaigns,
                                        product_structures, 'competitorB', original_dtypes_A)  # pode usar os mesmos dtypes
        if X_B is not None and not X_B.empty:
            df_eval.loc[idx, 'pvp_api_modelB'] = modelB.predict(X_B)[0]

    except Exception as e:
        print(f"Erro em SKU {sku}, data {date}: {e}")


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3306, data 20241204: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4466, data 20241118: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1554, data 20241121: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1937, data 20241227: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2262, data 20241217: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4555, data 20241129: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3417, data 20241105: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2971, data 20241102: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3505, data 20241112: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3550, data 20241218: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3516, data 20241127: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2266, data 20241205: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4492, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3310, data 20241208: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3514, data 20241113: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3277, data 20241117: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4154, data 20241216: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1218, data 20241216: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2538, data 20241029: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1612, data 20241107: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2727, data 20241215: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4003, data 20241205: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4579, data 20241217: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3453, data 20241208: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2971, data 20241121: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1809, data 20241029: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4492, data 20241122: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1369, data 20241208: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3361, data 20241029: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2913, data 20241029: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4539, data 20241203: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2428, data 20241209: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4314, data 20241229: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1661, data 20241201: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4047, data 20241210: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1393, data 20241213: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3579, data 20241215: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3711, data 20241220: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4312, data 20241202: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2310, data 20241213: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1661, data 20241106: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1529, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4177, data 20241127: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3994, data 20241229: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1888, data 20241119: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2094, data 20241030: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3754, data 20241217: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2553, data 20241112: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3310, data 20241127: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4255, data 20241216: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2227, data 20241130: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2107, data 20241124: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3800, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1810, data 20241124: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4401, data 20241115: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1393, data 20241206: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3588, data 20241126: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3283, data 20241130: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3972, data 20241212: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1937, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4319, data 20241226: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4733, data 20241120: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1175, data 20241115: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2894, data 20241123: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2913, data 20241115: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4319, data 20241221: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3723, data 20241119: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1970, data 20241107: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1658, data 20241102: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4598, data 20241112: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1972, data 20241227: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2550, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4008, data 20241115: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4310, data 20241129: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1805, data 20241206: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2303, data 20241130: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1531, data 20241211: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3803, data 20241204: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4599, data 20241107: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4552, data 20241214: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3380, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3870, data 20241216: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4360, data 20241121: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2478, data 20241230: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4523, data 20241105: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2227, data 20241223: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3714, data 20241119: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2735, data 20241210: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1972, data 20241102: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4467, data 20241107: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1806, data 20241212: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 3361, data 20241112: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1529, data 20241223: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1784, data 20241125: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 1175, data 20241110: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 2107, data 20241208: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4442, data 20241123: Cannot convert non-finite values (NA or inf) to integer


  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):
  if pd.api.types.is_categorical_dtype(dtype):


Erro em SKU 4526, data 20241122: Cannot convert non-finite values (NA or inf) to integer
