In [None]:
import pandas as pd
import numpy as np
import joblib
import re

# ============================================================
# 🔧 Função auxiliar
# ============================================================
def extrair_numero(texto):
    if pd.isna(texto):
        return np.nan
    numeros = re.findall(r'\d+', str(texto))
    return int(numeros[0]) if numeros else np.nan


# ============================================================
# 📊 Estatísticas descritivas
# ============================================================
def estatisticas_descritivas(df):
    col_num = df.select_dtypes(include=['number']).columns
    col_cat = df.select_dtypes(include=['object', 'category']).columns
    col_date = df.select_dtypes(include=['datetime64[ns]', 'datetime64']).columns

    resumo = {}

    if len(col_num) > 0:
        stats_num = pd.DataFrame(index=col_num)
        stats_num['Count'] = df[col_num].count()
        stats_num['Mean'] = df[col_num].mean()
        stats_num['Std'] = df[col_num].std()
        stats_num['Var'] = df[col_num].var()
        stats_num['Min'] = df[col_num].min()
        stats_num['25%'] = df[col_num].quantile(0.25)
        stats_num['50% (Median)'] = df[col_num].median()
        stats_num['75%'] = df[col_num].quantile(0.75)
        stats_num['Max'] = df[col_num].max()
        resumo['Numéricas'] = stats_num.round(3)

    if len(col_cat) > 0:
        stats_cat = pd.DataFrame(index=col_cat)
        stats_cat['Count'] = df[col_cat].count()
        stats_cat['Unique'] = df[col_cat].nunique()
        stats_cat['Top'] = df[col_cat].mode().iloc[0]
        stats_cat['Freq'] = df[col_cat].apply(lambda x: x.value_counts().iloc[0])
        resumo['Categóricas'] = stats_cat

    if len(col_date) > 0:
        stats_date = pd.DataFrame(index=col_date)
        stats_date['Count'] = df[col_date].count()
        stats_date['Min'] = df[col_date].min()
        stats_date['Max'] = df[col_date].max()
        resumo['Datas'] = stats_date

    return resumo


def exibir_resumo(resumo):
    print("\n" + "="*50)
    print("RESUMO DAS ESTATÍSTICAS DESCRITIVAS")
    print("="*50)

    for tipo, stats in resumo.items():
        print(f"\n{'='*20} {tipo.upper()} {'='*20}")
        try:
            from IPython.display import display
            display(stats.style.set_properties(**{
                'text-align': 'center'
            }).set_table_styles([{
                'selector': 'th',
                'props': [('text-align', 'center')]
            }]))
        except ImportError:
            print(stats)
        print("-"*50)


# ============================================================
# 📥 Carregamento seguro dos DataFrames
# ============================================================
def carregar_dataframe(caminho):
    obj = joblib.load(caminho)

    if isinstance(obj, pd.DataFrame):
        return obj

    if isinstance(obj, dict):
        # tenta encontrar uma chave com 'df' no nome
        for key in obj.keys():
            if 'df' in key.lower():
                return obj[key]
        # se não encontrar, tenta pegar o primeiro DataFrame
        for key, value in obj.items():
            if isinstance(value, pd.DataFrame):
                return value
        raise ValueError(f"Nenhum DataFrame encontrado no arquivo {caminho}. Chaves: {list(obj.keys())}")

    raise ValueError(f"Formato desconhecido no arquivo {caminho}. Tipo: {type(obj)}")


# ============================================================
# 🔗 Arquivos mapeados
# ============================================================
arquivos = {
    'df_unidades': 'df_unidades.z',
    'df_vendas': 'df_vendas.z',
    'df_categorias': 'df_categorias.z',
    'df_produtos': 'df_produtos.z'
}

# ============================================================
# 🚚 Carregar todos
# ============================================================
dataframes = {}

for nome, caminho in arquivos.items():
    try:
        df = carregar_dataframe(caminho)
        dataframes[nome] = df
        print(f"✅ {nome} carregado com sucesso. Shape: {df.shape}")
    except Exception as e:
        print(f"❌ Erro ao carregar {nome}: {e}")


# ============================================================
# 🚀 Geração de estatísticas
# ============================================================
for nome, df in dataframes.items():
    print(f"\n{'#'*60}")
    print(f"ESTATÍSTICAS PARA: {nome.upper()}")
    print(f"{'#'*60}")
    resumo = estatisticas_descritivas(df)
    exibir_resumo(resumo)


✅ df_unidades carregado com sucesso. Shape: (3, 3)
✅ df_vendas carregado com sucesso. Shape: (10418, 14)
✅ df_categorias carregado com sucesso. Shape: (50, 2)
✅ df_produtos carregado com sucesso. Shape: (9112, 9)

############################################################
ESTATÍSTICAS PARA: DF_UNIDADES
############################################################

RESUMO DAS ESTATÍSTICAS DESCRITIVAS



Unnamed: 0,Count,Mean,Std,Var,Min,25%,50% (Median),75%,Max
id,3,10002.0,1.0,1.0,10001,10001.5,10002.0,10002.5,10003


--------------------------------------------------



Unnamed: 0,Count,Unique,Top,Freq
nome_fantasia,3,3,L01 - FORTALEZA,1
status,3,1,sim,3


--------------------------------------------------

############################################################
ESTATÍSTICAS PARA: DF_VENDAS
############################################################

RESUMO DAS ESTATÍSTICAS DESCRITIVAS



Unnamed: 0,Count,Mean,Std,Var,Min,25%,50% (Median),75%,Max
codigo_produto,10418,75510.994,90341.395,8161567642.636,4.0,272.0,7657.0,187295.0,196151.0
quantidade,10418,5.46,19.755,390.257,-59.0,1.0,2.0,5.0,600.0
total,10418,210.337,570.409,325365.963,-2415.0,30.0,80.0,200.0,22100.0
custo_medio,10342,95.042,362.568,131455.534,1.0,13.91,25.0,63.17,14900.0
valor_desconto,10418,67.794,226.247,51187.856,0.0,2.632,15.675,56.4,5262.0
codigo_empresa,10418,0.584,0.634,0.402,0.0,0.0,1.0,1.0,2.0
dia_semana,10418,2.106,1.494,2.231,0.0,1.0,2.0,3.0,6.0
mes,10418,1.896,1.003,1.006,1.0,1.0,2.0,3.0,12.0
trimestre,10418,1.011,0.177,0.031,1.0,1.0,1.0,1.0,4.0
media_movel_3meses,10418,5.362,8.39,70.392,-6.0,2.0,3.516,6.714,152.634


--------------------------------------------------



Unnamed: 0,Count,Min,Max
data_emissao,10418,2022-12-06 00:00:00,2025-02-28 00:00:00


--------------------------------------------------

############################################################
ESTATÍSTICAS PARA: DF_CATEGORIAS
############################################################

RESUMO DAS ESTATÍSTICAS DESCRITIVAS



Unnamed: 0,Count,Mean,Std,Var,Min,25%,50% (Median),75%,Max
id,50,260.7,173.558,30122.541,2,43.5,369.5,393.0,419


--------------------------------------------------



Unnamed: 0,Count,Unique,Top,Freq
descricao,50,50,ACESSORIO,1


--------------------------------------------------

############################################################
ESTATÍSTICAS PARA: DF_PRODUTOS
############################################################

RESUMO DAS ESTATÍSTICAS DESCRITIVAS



Unnamed: 0,Count,Mean,Std,Var,Min,25%,50% (Median),75%,Max
codigo_categoria,9109,78.439,128.77,16581.588,2.0,3.0,7.0,64.0,342.0
preco_venda,9112,219.25,442.911,196170.088,0.15,22.307,70.0,218.345,4485.82
preco_padronizado,9112,-0.07,0.318,0.101,-0.227,-0.211,-0.177,-0.071,2.99
preco_normalizado,9112,0.004,0.009,0.0,0.0,0.0,0.001,0.004,0.087


--------------------------------------------------



Unnamed: 0,Count,Unique,Top,Freq
id,9111,9111,000004,1
descricao,9112,9112,acessorio ref 10029,1
unidade,9112,5,un,9102
marca,9108,17,ediouro,1983
categoria,9109,19,livro,1983


--------------------------------------------------
