In [7]:
import numpy as np
import pandas as pd

# Definir número de turnos
num_turnos = 10

# Mockar dados para cada coluna
turnos = np.arange(1, num_turnos + 1)
cartas_na_mao = np.maximum(7 - turnos + 1, 0) + np.random.randint(0, 2, size=num_turnos)  # simula draws e cartas jogadas

# Garantir que terrenos em campo e mana disponível nunca diminuam
terrenos_baixados = np.random.randint(0, 2, size=num_turnos)
terrenos_em_campo = np.cumsum(terrenos_baixados)

mana_disponivel = terrenos_em_campo.copy()

# Gerar valores para spells jogados e mana gasta
spells_jogados = np.random.randint(0, 3, size=num_turnos)
mana_gasta = np.minimum(mana_disponivel, spells_jogados + np.random.randint(0, 2, size=num_turnos))  # mana gasta não pode exceder a mana disponível

# Criar o DataFrame
df_simulacao = pd.DataFrame({
    'turn': turnos,
    'hand_cards': cartas_na_mao,
    'mana': mana_disponivel,
    'lands': terrenos_em_campo,
    'spells_played': spells_jogados,
    'spent_mana': mana_gasta,
    'lands_entered': terrenos_baixados
})

df_simulacao

Unnamed: 0,turn,hand_cards,mana,lands,spells_played,spent_mana,lands_entered
0,1,8,1,1,2,1,1
1,2,6,1,1,0,1,0
2,3,5,2,2,1,1,1
3,4,4,3,3,1,1,1
4,5,3,4,4,2,3,1
5,6,2,5,5,1,2,1
6,7,2,6,6,2,3,1
7,8,1,7,7,2,3,1
8,9,1,7,7,1,2,0
9,10,1,8,8,0,0,1


In [11]:
# Calcular a soma cumulativa de mana disponível e mana gasta
df_simulacao['cumulative_mana'] = df_simulacao['mana'].cumsum()
df_simulacao['cumulative_spent_mana'] = df_simulacao['spent_mana'].cumsum()

# Calcular a eficiência da curva de mana cumulativa
df_simulacao["mana_curve_efficiency"] = df_simulacao['cumulative_spent_mana'] / df_simulacao['cumulative_mana']
df_simulacao['diff_mana'] = df_simulacao['mana'].diff().fillna(0)
df_simulacao['diff_hand_cards'] = df_simulacao['hand_cards'].diff().fillna(0)
df_simulacao['spent_mana_per_land'] = df_simulacao['spent_mana'] / df_simulacao['lands']
df_simulacao['spells_played_per_mana'] = df_simulacao['spells_played'] / df_simulacao['mana']
df_simulacao['mana_usage_rate'] = df_simulacao['spent_mana'] / df_simulacao['mana']

# Feature interaction example
df_simulacao['mana_lands_interaction'] = df_simulacao['mana'] * df_simulacao['lands']
df_simulacao

Unnamed: 0,turn,hand_cards,mana,lands,spells_played,spent_mana,lands_entered,mana_curve_efficiency,cumulative_mana,cumulative_spent_mana,diff_mana,diff_hand_cards,spent_mana_per_land,spells_played_per_mana,mana_usage_rate,mana_lands_interaction
0,1,8,1,1,2,1,1,1.0,1,1,0.0,0.0,1.0,2.0,1.0,1
1,2,6,1,1,0,1,0,1.0,2,2,0.0,-2.0,1.0,0.0,1.0,1
2,3,5,2,2,1,1,1,0.75,4,3,1.0,-1.0,0.5,0.5,0.5,4
3,4,4,3,3,1,1,1,0.571429,7,4,1.0,-1.0,0.333333,0.333333,0.333333,9
4,5,3,4,4,2,3,1,0.636364,11,7,1.0,-1.0,0.75,0.5,0.75,16
5,6,2,5,5,1,2,1,0.5625,16,9,1.0,-1.0,0.4,0.2,0.4,25
6,7,2,6,6,2,3,1,0.545455,22,12,1.0,0.0,0.5,0.333333,0.5,36
7,8,1,7,7,2,3,1,0.517241,29,15,1.0,-1.0,0.428571,0.285714,0.428571,49
8,9,1,7,7,1,2,0,0.472222,36,17,0.0,0.0,0.285714,0.142857,0.285714,49
9,10,1,8,8,0,0,1,0.386364,44,17,1.0,0.0,0.0,0.0,0.0,64


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Separar as features (X) e o target (y)
X = df_simulacao.drop(columns=['mana_curve_efficiency'])
y = df_simulacao['mana_curve_efficiency']

# Dividir os dados em conjuntos de treino e teste (80% treino, 20% teste)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Configuração do modelo RandomForest com Cross-Validation
model = RandomForestRegressor(random_state=42)

# Definição de um grid de hiperparâmetros para ajuste
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Configuração do GridSearchCV com Cross-Validation (5 folds)
grid_search = GridSearchCV(
    estimator=model, 
    param_grid=param_grid, 
    cv=5, n_jobs=-1, 
    verbose=2, 
    scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Melhor modelo encontrado
best_model = grid_search.best_estimator_

# Avaliação no conjunto de teste
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Mean Squared Error on Test Set: {mse}")
print(f"R-squared on Test Set: {r2}")

# Importância das features
feature_importances = pd.Series(best_model.feature_importances_, index=X.columns)
print("Feature Importances:")
print(feature_importances.sort_values(ascending=False))

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Mean Squared Error on Test Set: 0.019568853601322662
R-squared on Test Set: 0.7189890939909787
Feature Importances:
spells_played_per_mana    0.141800
hand_cards                0.110293
turn                      0.100997
mana                      0.099701
cumulative_spent_mana     0.088457
spent_mana_per_land       0.080418
mana_lands_interaction    0.078277
mana_usage_rate           0.072506
lands                     0.065976
cumulative_mana           0.063302
spells_played             0.037405
diff_mana                 0.027507
spent_mana                0.018168
diff_hand_cards           0.015193
lands_entered             0.000000
dtype: float64


In [None]:
# A cor do baralho pode ser uma feature importante
# Que features que não sejam tão correlacionadas que eu consigo para medir a eficiência de mana?