In [1]:
import pandas as pd
from pycaret.classification import setup, compare_models, predict_model, load_model, finalize_model, save_model, pull
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import joblib

# Load the CSV file into a DataFrame
df = pd.read_csv('players_df_sin_reco.csv')

# Assuming the last columns are the target labels and the rest are features
X = df.iloc[:, 6:20]  # Select columns 7 to 20 (0-based index: 6 to 19)
y = df.iloc[:, -11:]  

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of target column names
target_columns = y.columns

# Create an empty DataFrame to store results
results_df = pd.DataFrame(columns=['target', 'accuracy'])

for target in target_columns:
    X_target_train = X_train.copy()
    X_target_test = X_test.copy()
    y_target_train = y_train[target]
    y_target_test = y_test[target]
    df_target = pd.concat([X_target_train, y_target_train], axis=1)

    # Setup PyCaret environment
    clf = setup(data=df_target, target=target)

    # Compare different models
    best_model = compare_models()

    # Finalize the best model
    final_model = finalize_model(best_model)

    # Predict on the test set
    predictions = predict_model(final_model, data=X_target_test)

    # Assuming 'prediction_label' is the predicted label column (adjust if necessary)
    y_pred = predictions['prediction_label']

    # Calculate accuracy
    accuracy = accuracy_score(y_target_test, y_pred)

    # Append results to DataFrame
    new_row = pd.DataFrame({'target': target, 'accuracy': accuracy}, index=[0])
    results_df = pd.concat([results_df, new_row], ignore_index=True)

    # Save the model
    save_model(final_model, f'model_{target}')

    # Load the model for future predictions
    loaded_model = load_model(f'model_{target}')

# Save the results to a PKL file using joblib
joblib.dump(results_df, 'finalmodel.pkl')

Unnamed: 0,Description,Value
0,Session id,638
1,Target,Class_Traditional Keeper
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9857,0.8917,0.9,0.85,0.8667,,0.8645,0.022
ridge,Ridge Classifier,0.9857,0.8833,0.9,0.85,0.8667,,0.8645,0.014
lda,Linear Discriminant Analysis,0.9857,0.8833,0.9,0.85,0.8667,,0.8645,0.01
dt,Decision Tree Classifier,0.969,0.8417,0.8,0.75,0.7667,,0.7645,0.01
rf,Random Forest Classifier,0.969,0.9,0.7,0.7,0.7,,0.7,0.068
ada,Ada Boost Classifier,0.969,0.8717,0.8,0.75,0.7667,,0.7645,0.041
gbc,Gradient Boosting Classifier,0.969,0.8917,0.8,0.75,0.7667,,0.7645,0.029
et,Extra Trees Classifier,0.969,0.9,0.8,0.75,0.7667,,0.7645,0.052
xgboost,Extreme Gradient Boosting,0.969,0.88,0.8,0.75,0.7667,,0.7645,0.17
lightgbm,Light Gradient Boosting Machine,0.969,0.88,0.7,0.7,0.7,,0.7,0.036


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2425
1,Target,Class_Sweeper Keeper
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9381,0.975,0.85,0.85,0.8333,0.8143,0.8265,0.06
ada,Ada Boost Classifier,0.9214,0.955,0.9,0.85,0.85,0.8014,0.8197,0.032
et,Extra Trees Classifier,0.9214,1.0,0.85,0.9333,0.85,0.8081,0.8371,0.051
lr,Logistic Regression,0.919,1.0,0.85,0.9,0.8333,0.7874,0.8175,0.019
dt,Decision Tree Classifier,0.9071,0.88,0.8,0.9,0.8167,0.7603,0.7843,0.009
knn,K Neighbors Classifier,0.9024,0.98,0.8,0.8333,0.7833,0.7293,0.7525,0.015
gbc,Gradient Boosting Classifier,0.9024,0.945,0.85,0.8833,0.8267,0.7648,0.7967,0.036
ridge,Ridge Classifier,0.8881,0.9875,0.8,0.85,0.7667,0.7034,0.7453,0.009
lda,Linear Discriminant Analysis,0.8881,0.9875,0.8,0.85,0.7667,0.7034,0.7453,0.011
nb,Naive Bayes,0.8738,0.96,0.6,0.8,0.6667,0.6103,0.6343,0.01


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6515
1,Target,Class_Ball Playing Defender
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9714,0.99,1.0,0.9714,0.9833,0.9,0.9,0.055
lr,Logistic Regression,0.969,0.975,1.0,0.9633,0.9798,0.916,0.9278,0.012
ridge,Ridge Classifier,0.969,0.965,1.0,0.9633,0.9798,0.916,0.9278,0.011
lda,Linear Discriminant Analysis,0.969,0.965,1.0,0.9633,0.9798,0.916,0.9278,0.01
ada,Ada Boost Classifier,0.9548,0.99,1.0,0.9514,0.9722,0.8571,0.8632,0.038
svm,SVM - Linear Kernel,0.9524,0.975,0.95,0.9833,0.9576,0.8988,0.9145,0.009
nb,Naive Bayes,0.9405,0.94,0.955,0.9667,0.9584,0.8431,0.8449,0.01
rf,Random Forest Classifier,0.9405,0.98,0.98,0.9467,0.9616,0.8336,0.8374,0.071
lightgbm,Light Gradient Boosting Machine,0.9238,0.98,0.96,0.9467,0.9505,0.7908,0.8007,0.036
knn,K Neighbors Classifier,0.9214,0.9825,0.975,0.9264,0.9472,0.7821,0.7882,0.015


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,5370
1,Target,Class_No Nonsense Defender
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9548,0.9775,0.96,0.98,0.9639,0.9033,0.918,0.021
ridge,Ridge Classifier,0.9524,0.9775,1.0,0.9433,0.9687,0.8731,0.891,0.009
lda,Linear Discriminant Analysis,0.9524,0.9775,1.0,0.9433,0.9687,0.8731,0.891,0.009
knn,K Neighbors Classifier,0.8929,0.9538,0.955,0.9017,0.9239,0.7288,0.7349,0.015
et,Extra Trees Classifier,0.8881,0.9233,0.955,0.895,0.9194,0.7356,0.7608,0.05
lightgbm,Light Gradient Boosting Machine,0.8881,0.9375,0.98,0.8867,0.9244,0.7106,0.7358,0.031
svm,SVM - Linear Kernel,0.8738,0.98,0.98,0.8667,0.9156,0.671,0.6928,0.01
rf,Random Forest Classifier,0.8714,0.8942,0.955,0.8817,0.9106,0.6784,0.6976,0.061
nb,Naive Bayes,0.8643,0.9308,0.91,0.8933,0.8961,0.6901,0.7125,0.01
xgboost,Extreme Gradient Boosting,0.8381,0.89,0.905,0.87,0.8833,0.6177,0.6277,0.016


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,3380
1,Target,Class_Full Back
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
xgboost,Extreme Gradient Boosting,0.9357,0.9875,0.98,0.94,0.9556,0.841,0.8628,0.015
knn,K Neighbors Classifier,0.9048,0.9725,0.955,0.915,0.9328,0.7693,0.7815,0.015
lda,Linear Discriminant Analysis,0.9048,0.9425,0.955,0.915,0.9328,0.7693,0.7815,0.009
lr,Logistic Regression,0.9024,0.955,0.935,0.935,0.9306,0.766,0.7878,0.02
ada,Ada Boost Classifier,0.9024,0.9562,0.955,0.9233,0.9322,0.7077,0.7335,0.034
et,Extra Trees Classifier,0.8857,0.9675,0.955,0.9017,0.9217,0.7089,0.7245,0.048
lightgbm,Light Gradient Boosting Machine,0.8857,0.9175,0.935,0.915,0.9217,0.6889,0.7045,0.031
svm,SVM - Linear Kernel,0.8714,0.905,0.915,0.92,0.9052,0.6985,0.7417,0.01
ridge,Ridge Classifier,0.8714,0.9425,0.955,0.8817,0.9128,0.6693,0.6815,0.009
rf,Random Forest Classifier,0.869,0.9425,0.955,0.885,0.9126,0.6089,0.6245,0.06


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,7700
1,Target,Class_All Action Midfielder
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9333,0.9625,1.0,0.9267,0.9578,0.8143,0.8265,0.031
ridge,Ridge Classifier,0.9214,1.0,0.98,0.9267,0.9467,0.7963,0.8093,0.01
lda,Linear Discriminant Analysis,0.9214,1.0,0.98,0.9267,0.9467,0.7963,0.8093,0.01
svm,SVM - Linear Kernel,0.919,1.0,0.93,0.96,0.9333,0.8239,0.8495,0.01
nb,Naive Bayes,0.9048,0.9208,0.98,0.9133,0.9378,0.7391,0.7461,0.012
et,Extra Trees Classifier,0.9024,1.0,0.98,0.9,0.9333,0.7553,0.7893,0.051
lr,Logistic Regression,0.8857,0.9875,0.93,0.9267,0.9181,0.7172,0.7409,0.019
knn,K Neighbors Classifier,0.8857,0.95,1.0,0.8667,0.9244,0.6981,0.726,0.02
rf,Random Forest Classifier,0.8857,0.925,0.955,0.9067,0.9213,0.7077,0.7335,0.068
xgboost,Extreme Gradient Boosting,0.8667,0.975,0.975,0.8733,0.9124,0.6381,0.6604,0.016


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8893
1,Target,Class_Midfield Playmaker
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9405,0.99,0.98,0.949,0.9632,0.73,0.73,0.02
ridge,Ridge Classifier,0.9405,0.9567,0.96,0.9657,0.9612,0.7871,0.7932,0.01
rf,Random Forest Classifier,0.9405,0.99,0.98,0.949,0.9632,0.73,0.73,0.061
lda,Linear Discriminant Analysis,0.9405,0.9567,0.96,0.9657,0.9612,0.7871,0.7932,0.009
et,Extra Trees Classifier,0.9405,0.98,0.98,0.949,0.9632,0.73,0.73,0.048
knn,K Neighbors Classifier,0.9238,0.985,0.96,0.949,0.9521,0.6871,0.6932,0.015
ada,Ada Boost Classifier,0.9238,0.9733,0.98,0.9324,0.9541,0.63,0.63,0.033
lightgbm,Light Gradient Boosting Machine,0.9238,0.99,0.96,0.9467,0.9507,0.6996,0.703,0.032
xgboost,Extreme Gradient Boosting,0.9071,0.95,0.96,0.9324,0.943,0.5871,0.5932,0.018
dt,Decision Tree Classifier,0.8905,0.835,0.92,0.949,0.9271,0.6205,0.638,0.009


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1990
1,Target,Class_Traditional Winger
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.969,1.0,0.9167,1.0,0.9467,0.9267,0.9363,0.018
ridge,Ridge Classifier,0.8905,0.9542,0.8667,0.8583,0.849,0.7625,0.7756,0.01
lda,Linear Discriminant Analysis,0.8905,0.9542,0.8667,0.8583,0.849,0.7625,0.7756,0.009
rf,Random Forest Classifier,0.869,0.9167,0.75,0.875,0.7857,0.6934,0.7147,0.064
et,Extra Trees Classifier,0.8548,0.9042,0.7167,0.875,0.7657,0.663,0.6878,0.049
svm,SVM - Linear Kernel,0.8381,0.9167,0.8,0.7333,0.7314,0.6316,0.6679,0.009
xgboost,Extreme Gradient Boosting,0.8286,0.9125,0.7333,0.7417,0.7124,0.603,0.6262,0.016
ada,Ada Boost Classifier,0.8238,0.9375,0.75,0.7083,0.7005,0.5886,0.6179,0.035
lightgbm,Light Gradient Boosting Machine,0.8143,0.8875,0.7833,0.7083,0.7395,0.5928,0.5974,0.031
nb,Naive Bayes,0.8048,0.875,0.9,0.7083,0.7848,0.624,0.6371,0.01


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1516
1,Target,Class_Inverted Winger
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.95,0.9889,0.9667,0.95,0.9514,0.9,0.9121,0.017
et,Extra Trees Classifier,0.9381,0.9889,0.9333,0.95,0.9314,0.8749,0.8895,0.051
ridge,Ridge Classifier,0.919,0.9694,0.9667,0.9,0.9229,0.8387,0.8578,0.009
lda,Linear Discriminant Analysis,0.919,0.9694,0.9667,0.9,0.9229,0.8387,0.8578,0.01
knn,K Neighbors Classifier,0.9024,0.9722,0.9667,0.875,0.9086,0.8053,0.8286,0.014
rf,Random Forest Classifier,0.8881,0.9889,0.8667,0.925,0.8771,0.7749,0.8016,0.062
gbc,Gradient Boosting Classifier,0.8714,0.9389,0.8333,0.9083,0.849,0.7417,0.7611,0.046
qda,Quadratic Discriminant Analysis,0.8595,0.8611,0.8333,0.8017,0.8074,0.7212,0.7379,0.009
svm,SVM - Linear Kernel,0.8571,0.9722,0.8333,0.9,0.8329,0.716,0.7526,0.009
xgboost,Extreme Gradient Boosting,0.8381,0.9472,0.8,0.8833,0.8248,0.675,0.6993,0.016


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2318
1,Target,Class_Goal Poacher
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9381,1.0,0.95,0.9267,0.9217,0.87,0.8887,0.021
et,Extra Trees Classifier,0.8548,0.9208,0.7333,0.8833,0.77,0.6689,0.7022,0.05
ridge,Ridge Classifier,0.8524,0.9667,0.7667,0.7667,0.7333,0.6505,0.6824,0.01
lda,Linear Discriminant Analysis,0.8524,0.9667,0.7667,0.7667,0.7333,0.6505,0.6824,0.009
knn,K Neighbors Classifier,0.8238,0.9021,0.65,0.8167,0.6933,0.5833,0.6107,0.014
rf,Random Forest Classifier,0.8238,0.9167,0.6333,0.7667,0.6733,0.5718,0.5946,0.061
gbc,Gradient Boosting Classifier,0.819,0.89,0.7,0.7833,0.71,0.5814,0.6075,0.038
nb,Naive Bayes,0.8095,0.8875,0.8,0.7667,0.7533,0.6097,0.6345,0.009
svm,SVM - Linear Kernel,0.8095,0.8833,0.5833,0.7667,0.6467,0.5343,0.5557,0.012
ada,Ada Boost Classifier,0.8095,0.9212,0.6833,0.7667,0.7067,0.5679,0.585,0.034


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,4064
1,Target,Class_Target Man
2,Target type,Binary
3,Original data shape,"(91, 15)"
4,Transformed data shape,"(91, 15)"
5,Transformed train set shape,"(63, 15)"
6,Transformed test set shape,"(28, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9167,0.95,0.975,0.9333,0.9457,0.7667,0.7707,0.017
lightgbm,Light Gradient Boosting Machine,0.9095,0.9389,0.95,0.915,0.919,0.8091,0.8348,0.045
knn,K Neighbors Classifier,0.8857,0.9125,0.975,0.8767,0.9185,0.7184,0.732,0.015
et,Extra Trees Classifier,0.8857,0.9875,0.95,0.8967,0.9153,0.7208,0.734,0.053
svm,SVM - Linear Kernel,0.8738,0.975,0.9,0.905,0.8889,0.7469,0.7766,0.009
rf,Random Forest Classifier,0.8714,0.9479,0.925,0.8967,0.901,0.6928,0.709,0.06
xgboost,Extreme Gradient Boosting,0.8714,0.9667,0.925,0.8967,0.8963,0.695,0.7137,0.017
ridge,Ridge Classifier,0.8571,0.9708,0.9,0.8917,0.8788,0.6765,0.6962,0.009
lda,Linear Discriminant Analysis,0.8571,0.9708,0.9,0.8917,0.8788,0.6765,0.6962,0.017
nb,Naive Bayes,0.8333,0.8889,0.875,0.8567,0.8484,0.661,0.6918,0.01


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


['finalmodel.pkl']