In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import joblib
import os

# Load dataset and remove missing values
dataset = pd.read_csv('datas/dataset.csv', delimiter='\t')
dataset = dataset.dropna(axis=0, how='any')

# Features and target
X = dataset.iloc[:, 5:12].values   # Features: stats, generation
y = dataset.iloc[:, 16].values     # Target: percentage of victory

# Split into training and validation sets
X_APPRENTISSAGE, X_VALIDATION, Y_APPRENTISSAGE, Y_VALIDATION = train_test_split(
    X, y, test_size=0.2, random_state=0
)

# ----- LINEAR REGRESSION -----
lin_reg = LinearRegression()
lin_reg.fit(X_APPRENTISSAGE, Y_APPRENTISSAGE)
pred_lin = lin_reg.predict(X_VALIDATION)
precision_lin = r2_score(Y_VALIDATION, pred_lin)
print('>> ----------- REGRESSION LINEAIRE -----------')
print('>> Precision =', precision_lin)
print('------------------------------------------')

# ----- DECISION TREE REGRESSOR -----
tree_reg = DecisionTreeRegressor()
tree_reg.fit(X_APPRENTISSAGE, Y_APPRENTISSAGE)
pred_tree = tree_reg.predict(X_VALIDATION)
precision_tree = r2_score(Y_VALIDATION, pred_tree)
print('>> ----------- ARBRES DE DECISIONS -----------')
print('>> Precision =', precision_tree)
print('------------------------------------------')

# ----- RANDOM FOREST REGRESSOR -----
forest_reg = RandomForestRegressor()
forest_reg.fit(X_APPRENTISSAGE, Y_APPRENTISSAGE)
pred_forest = forest_reg.predict(X_VALIDATION)
precision_forest = r2_score(Y_VALIDATION, pred_forest)
print('>> ----------- FORETS ALEATOIRES -----------')
print('>> Precision =', precision_forest)
print('------------------------------------------')

# Save the best model (Random Forest)
os.makedirs('modele', exist_ok=True)
joblib.dump(forest_reg, 'modele/modele_pokemon.mod')
print('Random Forest model saved to modele/modele_pokemon.mod')
