In [None]:
# ============================================================
# MEGA-SENA - MACHINE LEARNING BASEADO EM HIST√ìRICO
# ============================================================

In [None]:
# --------------------
# Imports
# --------------------

In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import hamming_loss

In [None]:
# --------------------
# Upload do dataset
# --------------------

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
# --------------------
# Leitura do dataset
# --------------------

In [None]:
file_name = list(uploaded.keys())[0]
data = pd.read_csv(file_name)

In [None]:
# Colunas com os n√∫meros sorteados

In [None]:
balls = ["Ball1", "Ball2", "Ball3", "Ball4", "Ball5", "Ball6"]

In [None]:
# --------------------
# Transforma√ß√£o dos dados
# Cada sorteio -> vetor bin√°rio de 60 posi√ß√µes
# --------------------

In [None]:
X = []
y = []

for _, row in data.iterrows():
    vetor = np.zeros(60)
    for n in row[balls]:
        vetor[int(n) - 1] = 1
    X.append(vetor)
    y.append(vetor)

X = np.array(X)
y = np.array(y)

In [None]:
# --------------------
# Separa√ß√£o treino / teste
# --------------------

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# --------------------
# Treinamento do modelo
# --------------------

In [None]:
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=25,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

In [None]:
# --------------------
# Avalia√ß√£o do modelo
# --------------------

In [None]:
y_pred = model.predict(X_test)
loss = hamming_loss(y_test, y_pred)

print(f"\nHamming Loss (quanto menor, melhor): {loss:.4f}")

In [None]:
# --------------------
# Probabilidade de cada n√∫mero (1 a 60)
# --------------------

In [None]:
ultima_entrada = X[-1].reshape(1, -1)

probas = model.predict_proba(ultima_entrada)
probabilidades = np.array([p[0][1] for p in probas])

ranking = pd.DataFrame({
    "Numero": np.arange(1, 61),
    "Probabilidade": probabilidades
}).sort_values(by="Probabilidade", ascending=False)

print("\nTop 15 n√∫meros mais prov√°veis segundo o modelo:")
print(ranking.head(15))

In [None]:
# --------------------
# Melhor jogo sugerido (Top 6)
# --------------------

In [None]:
melhor_jogo = ranking.head(6)["Numero"].sort_values().tolist()

print("\nüéØ Melhor jogo sugerido:")
print(melhor_jogo)

In [None]:
# --------------------
# Gera√ß√£o de m√∫ltiplos jogos otimizados
# --------------------

In [None]:
top_20 = ranking.head(20)["Numero"].tolist()

print("\nüé≤ Jogos sugeridos:")
for i in range(10):
    jogo = sorted(random.sample(top_20, 6))
    print(f"Jogo {i+1}: {jogo}")