In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, GRU, Dense

df = pd.read_excel("./posicoes_csv3.xlsx")  #
df['is_winner'] = (df['colocacao'] == 1).astype(int)

features = ['wins', 'highs', 'safe', 'low', 'bttm', 'elim']


X_seqs = []
y = []
lengths = []

max_len = df.groupby(['temporada_franquia','id_queen']).size().max()

groups = df.groupby(['temporada_franquia','id_queen'])

for (season, qid), g in groups:
    g = g.sort_values('episodio')
    feats = g[features].values       # shape (n_episodes, n_features)
    label = g['is_winner'].iloc[0]  # rótulo final, não entra no input

    for t in range(1, len(feats)+1):
        prefix = feats[:t]            # acumulado até o episódio t (sem info futura)

        # padding
        padded = np.zeros((max_len, len(features)))
        padded[:t, :] = prefix

        X_seqs.append(padded)
        lengths.append(t)
        y.append(label)


X_seqs = np.array(X_seqs)  # (n_samples, max_len, n_features)
y = np.array(y)
lengths = np.array(lengths)


# Criar modelo RNN/GRU

model = Sequential()
model.add(Masking(mask_value=0., input_shape=(max_len, len(features))))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


#  Treinar modelo

model.fit(X_seqs, y, epochs=20, batch_size=8)

# 6️Gerar previsões episódio a episódio

results = []

for (season, qid), g in groups:
    g = g.sort_values('episodio')
    nome = g['nome'].iloc[0]

    row = {'queen': nome, 'temporada': season}
    feats = g[features].values

    for t in range(1, len(feats)+1):
        prefix = feats[:t]
        padded = np.zeros((max_len, len(features)))
        padded[:t, :] = prefix

        padded_batch = np.expand_dims(padded, axis=0)  # shape (1, max_len, n_features)
        prob = model.predict(padded_batch, verbose=0).ravel()[0]

        row[f'episodio_{t}'] = f"{prob*100:.1f}%"

    results.append(row)

df_results = pd.DataFrame(results)

# Salvar resultados

df_results.to_csv("probabilidades_sem_vazamento.csv", index=False)
df_results.to_excel("probabilidades_sem_vazamento.xlsx", index=False)


  super().__init__(**kwargs)


Epoch 1/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.7699 - loss: 0.4944
Epoch 2/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.8841 - loss: 0.2733
Epoch 3/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9043 - loss: 0.2356
Epoch 4/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8950 - loss: 0.2493
Epoch 5/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9064 - loss: 0.2305
Epoch 6/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9216 - loss: 0.1982
Epoch 7/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9061 - loss: 0.2125
Epoch 8/20
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9010 - loss: 0.2447
Epoch 9/20
[1m268/268[0m [32m━━━━━

In [5]:
queen_names = ['Lady Camden', 'Willow Pill']
for name in queen_names:
    g = df[df['nome']==name].sort_values('episodio')
    print(f"\n=== {name} ===")
    print(g[features])



=== Lady Camden ===
      wins  highs  safe  low  bttm  elim
1264     0      1     0    0     0     0
1265     0      1     1    0     0     0
1266     0      1     2    0     0     0
1267     0      2     2    0     0     0
1268     0      3     2    0     0     0
1269     1      3     2    0     0     0
1270     1      3     2    1     0     0
1271     1      3     3    1     0     0
1272     1      3     3    1     1     0
1273     2      3     3    1     1     0
1274     2      4     3    1     1     0
1275     3      4     3    1     1     0
1276     3      4     3    1     1     0

=== Willow Pill ===
      wins  highs  safe  low  bttm  elim
1251     0      1     0    0     0     0
1252     1      1     0    0     0     0
1253     1      2     0    0     0     0
1254     1      2     1    0     0     0
1255     1      2     2    0     0     0
1256     1      3     2    0     0     0
1257     1      3     3    0     0     0
1258     1      4     3    0     0     0
1259     1     

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_seqs, y, test_size=0.2, random_state=42)

# Criar modelo
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(max_len, len(features))))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Treinar modelo
model.fit(X_train, y_train, epochs=20, batch_size=8, validation_data=(X_test, y_test))

# Predição e cálculo do ROC AUC
y_pred = model.predict(X_test).ravel()
auc = roc_auc_score(y_test, y_pred)

print(f"ROC AUC: {auc:.4f}")


  super().__init__(**kwargs)


Epoch 1/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.8675 - loss: 0.4441 - val_accuracy: 0.8785 - val_loss: 0.2949
Epoch 2/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8881 - loss: 0.2996 - val_accuracy: 0.8832 - val_loss: 0.2805
Epoch 3/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8755 - loss: 0.2587 - val_accuracy: 0.8692 - val_loss: 0.2716
Epoch 4/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8966 - loss: 0.2498 - val_accuracy: 0.8808 - val_loss: 0.2636
Epoch 5/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9082 - loss: 0.2216 - val_accuracy: 0.8692 - val_loss: 0.2609
Epoch 6/20
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9063 - loss: 0.2349 - val_accuracy: 0.8832 - val_loss: 0.2581
Epoch 7/20
[1m214/214[0m 