# Análise Quantitativa - Minimercados Autônomos
Este notebook implementa a análise quantitativa simulada do estudo, seguindo as etapas do CRISP-DM. Inclui geração de dados fictícios, análise exploratória, clusterização, modelagem preditiva e prescritiva, e exportação de resultados.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import pearsonr, spearmanr
import os

np.random.seed(42)


## 1. Geração do Dataset Simulado

In [None]:

n_skus = 50
months = pd.date_range(start='2025-01-01', periods=6, freq='MS')
categories = ['Bebidas','Frios','Mercearia','Higiene','Outros']
turnover_classes = ['alto','medio','baixo']

rows = []
for sku_id in range(1, n_skus+1):
    sku = f"SKU{sku_id:03d}"
    category = np.random.choice(categories, p=[0.2,0.15,0.4,0.15,0.1])
    turnover = np.random.choice(turnover_classes, p=[0.2,0.5,0.3])
    base_price = round(np.random.uniform(2.5, 30.0),2)
    base_cost = round(base_price * np.random.uniform(0.5,0.8),2)
    if turnover == 'alto':
        mu = np.random.uniform(150, 300)
    elif turnover == 'medio':
        mu = np.random.uniform(40, 150)
    else:
        mu = np.random.uniform(5, 40)
    for m in months:
        month_factor = 1 + 0.05 * np.sin((m.month/12)*2*np.pi) + np.random.normal(0,0.03)
        promo = np.random.binomial(1, 0.12)
        promo_effect = 1.4 if promo==1 else 1.0
        price = round(base_price * (1 + np.random.normal(0,0.03)),2)
        price_factor = max(0.6, 1 - 0.02*(price-base_price)/base_price)
        sales = max(0, int(np.random.poisson(mu * month_factor * promo_effect * price_factor)))
        stock = int(max(5, np.round(mu * np.random.uniform(0.8,1.8))))
        losses = int(np.round(sales * np.random.uniform(0.01,0.06)))
        rows.append({
            'sku': sku,
            'category': category,
            'turnover_class': turnover,
            'date': m,
            'price': price,
            'cost': base_cost,
            'sales_qty': sales,
            'stock': stock,
            'promo': promo,
            'losses': losses
        })

df = pd.DataFrame(rows)
df.head()


## 2. Agregação por SKU

In [None]:

agg = df.groupby('sku').agg(
    category=('category','first'),
    turnover_class=('turnover_class','first'),
    avg_price=('price','mean'),
    avg_cost=('cost','mean'),
    total_sales_6m=('sales_qty','sum'),
    avg_monthly_sales=('sales_qty','mean'),
    std_monthly_sales=('sales_qty','std'),
    avg_stock=('stock','mean'),
    total_losses=('losses','sum'),
    promo_count=('promo','sum')
).reset_index()

agg['cv_sales'] = agg['std_monthly_sales'] / (agg['avg_monthly_sales'].replace(0, np.nan))
agg['turnover_rate_est'] = agg['total_sales_6m'] / (agg['avg_stock']*6)
agg.fillna(0, inplace=True)
agg.head()


## 3. Análise Exploratória

In [None]:

sales_by_month = df.groupby('date')['sales_qty'].sum().reset_index()
plt.plot(sales_by_month['date'], sales_by_month['sales_qty'], marker='o')
plt.title('Vendas totais por mês (simulado)')
plt.xlabel('Mês')
plt.ylabel('Quantidade vendida')
plt.grid(True)
plt.show()


## 4. Clusterização de Produtos (KMeans)

In [None]:

features = agg[['avg_monthly_sales','cv_sales','avg_stock']].copy()
scaler = StandardScaler()
X = scaler.fit_transform(features)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
agg['cluster'] = kmeans.fit_predict(X)

cluster_desc = agg.groupby('cluster').agg(mean_avg_sales=('avg_monthly_sales','mean')).reset_index().sort_values('mean_avg_sales', ascending=False)
order = cluster_desc['cluster'].values
label_map = {order[0]: 'alto', order[1]: 'medio', order[2]: 'baixo'}
agg['cluster_label'] = agg['cluster'].map(label_map)

agg[['sku','avg_monthly_sales','avg_stock','cluster_label']].head(10)


## 5. Modelagem Preditiva: Médias Móveis e Suavização Exponencial

In [None]:

def simple_moving_average(series, window):
    return series.rolling(window=window, min_periods=1).mean()

sku_example = df[df['sku']=='SKU001'].sort_values('date').copy()
sku_example['sma_3'] = simple_moving_average(sku_example['sales_qty'], 3)

alpha = 0.3
exp_vals = []
s = None
for val in sku_example['sales_qty']:
    if s is None:
        s = val
    else:
        s = alpha*val + (1-alpha)*s
    exp_vals.append(s)
sku_example['exp_smooth_alpha03'] = exp_vals

sku_example[['date','sales_qty','sma_3','exp_smooth_alpha03']]


## 6. Regressão Linear Simples (Preço e Promoção → Vendas)

In [None]:

X_reg = df[['price','promo']].copy()
y_reg = df['sales_qty']
lr = LinearRegression().fit(X_reg,y_reg)
print('Coef preço:', lr.coef_[0])
print('Coef promo:', lr.coef_[1])
print('R2:', lr.score(X_reg,y_reg))


## 7. Modelagem Prescritiva: Árvore de Decisão

In [None]:

def action_label(row):
    if row['avg_monthly_sales'] > row['avg_stock']*0.9:
        return 'reorder'
    elif row['avg_monthly_sales'] < row['avg_stock']*0.35:
        return 'decrease_stock'
    else:
        return 'maintain'

agg['action'] = agg.apply(action_label, axis=1)

clf_features = agg[['avg_monthly_sales','cv_sales','avg_stock','promo_count']].values
clf_labels = agg['action'].values
clf = DecisionTreeClassifier(random_state=42, max_depth=4).fit(clf_features, clf_labels)
preds = clf.predict(clf_features)

print('Acurácia:', accuracy_score(clf_labels, preds))
print(classification_report(clf_labels, preds))

plt.figure(figsize=(10,6))
plot_tree(clf, feature_names=['avg_monthly_sales','cv_sales','avg_stock','promo_count'], class_names=clf.classes_, filled=True)
plt.show()


## 8. Análise de Correlação

In [None]:

pairs = [
    ('avg_monthly_sales','avg_stock'),
    ('avg_monthly_sales','cv_sales'),
    ('total_sales_6m','promo_count'),
    ('avg_monthly_sales','avg_price')
]
for a,b in pairs:
    p_pearson = pearsonr(agg[a], agg[b])[0]
    p_spearman = spearmanr(agg[a], agg[b]).correlation
    print(f'{a} vs {b} → Pearson: {p_pearson:.3f}, Spearman: {p_spearman:.3f}')


## 9. Exportação dos Dados

In [None]:

os.makedirs('results', exist_ok=True)
df.to_csv('results/minimarket_simulated_monthly.csv', index=False)
agg.to_csv('results/minimarket_simulated_sku_agg.csv', index=False)
print("Arquivos exportados para pasta 'results'")
