In [2]:
import pandas as pd
from pycaret.classification import setup, compare_models, predict_model, load_model, finalize_model, save_model, pull
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import joblib

# Load the CSV file into a DataFrame
df = pd.read_csv('data2575_withkeepers.csv')

# Assuming the last columns are the target labels and the rest are features
X = df.iloc[:, 1:16]
y = df.iloc[:, -11:]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of target column names
target_columns = y.columns

# Create an empty DataFrame to store results
results_df = pd.DataFrame(columns=['target', 'accuracy'])

# Variable to store total accuracy
total_accuracy = 0

for target in target_columns:
    X_target_train = X_train.copy()
    X_target_test = X_test.copy()
    y_target_train = y_train[target]
    y_target_test = y_test[target]
    df_target = pd.concat([X_target_train, y_target_train], axis=1)

    # Setup PyCaret environment
    clf = setup(data=df_target, target=target)

    # Compare different models
    best_model = compare_models()

    # Finalize the best model
    final_model = finalize_model(best_model)

    # Predict on the test set
    predictions = predict_model(final_model, data=X_target_test)

    # Assuming 'prediction_label' is the predicted label column (adjust if necessary)
    y_pred = predictions['prediction_label']

    # Calculate accuracy
    accuracy = accuracy_score(y_target_test, y_pred)
    total_accuracy += accuracy

    # Append results to DataFrame
    new_row = pd.DataFrame({'target': target, 'accuracy': accuracy}, index=[0])
    results_df = pd.concat([results_df, new_row], ignore_index=True)

    # Save the model
    save_model(final_model, f'model_{target}')

    # Load the model for future predictions
    loaded_model = load_model(f'model_{target}')

# Calculate average accuracy
average_accuracy = total_accuracy / len(target_columns)

# Print the overall accuracy
print("Overall Accuracy:", average_accuracy)

# Save the results to a PKL file using joblib
joblib.dump(results_df, 'model_results2575_withkeepersall.pkl')

Unnamed: 0,Description,Value
0,Session id,2881
1,Target,y1_tradkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,0.9714,0.5833,0.6,0.5333,0.55,,0.5471,0.015
ada,Ada Boost Classifier,0.9714,0.5833,0.6,0.5333,0.55,,0.5471,0.011
gbc,Gradient Boosting Classifier,0.9714,0.5833,0.6,0.5333,0.55,,0.5471,0.031
knn,K Neighbors Classifier,0.9548,0.59,0.6,0.4833,0.5167,,0.5104,0.023
nb,Naive Bayes,0.9548,0.5917,0.6,0.4833,0.5167,,0.5104,0.013
lightgbm,Light Gradient Boosting Machine,0.9548,0.5833,0.5,0.4333,0.45,,0.4471,0.023
lr,Logistic Regression,0.9381,0.56,0.6,0.4333,0.4833,,0.4736,1.484
svm,SVM - Linear Kernel,0.9381,0.58,0.6,0.4833,0.5167,,0.5104,0.028
rf,Random Forest Classifier,0.9381,0.5667,0.4,0.3333,0.35,,0.3471,0.067
et,Extra Trees Classifier,0.9381,0.58,0.5,0.4333,0.45,,0.4271,0.059


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2959
1,Target,y2_sweeperkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9833,0.95,0.9,0.9,0.9,0.9,0.9,0.014
svm,SVM - Linear Kernel,0.9667,0.9,0.9,0.85,0.8667,0.8571,0.8632,0.01
ridge,Ridge Classifier,0.9667,0.98,0.9,0.85,0.8667,0.8571,0.8632,0.013
rf,Random Forest Classifier,0.9667,1.0,0.9,0.85,0.8667,0.8571,0.8632,0.064
ada,Ada Boost Classifier,0.9667,0.97,0.9,0.85,0.8667,0.8571,0.8632,0.051
lda,Linear Discriminant Analysis,0.9667,0.98,0.9,0.85,0.8667,0.8571,0.8632,0.01
et,Extra Trees Classifier,0.9667,1.0,0.9,0.85,0.8667,0.8571,0.8632,0.054
lightgbm,Light Gradient Boosting Machine,0.9667,1.0,0.9,0.85,0.8667,0.8571,0.8632,0.022
lr,Logistic Regression,0.95,0.98,0.9,0.8,0.8333,0.8143,0.8265,0.011
dt,Decision Tree Classifier,0.9357,0.8817,0.8,0.7,0.7333,0.716,0.7278,0.01


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6403
1,Target,y3_ballplayingdefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9857,1.0,0.98,1.0,0.9889,0.9696,0.973,0.017
dt,Decision Tree Classifier,0.9667,0.9625,0.975,0.98,0.9746,0.9238,0.934,0.01
gbc,Gradient Boosting Classifier,0.9667,0.9625,0.975,0.98,0.9746,0.9238,0.934,0.033
knn,K Neighbors Classifier,0.95,1.0,1.0,0.9433,0.9687,0.8143,0.8265,0.015
ada,Ada Boost Classifier,0.95,0.9812,0.975,0.9633,0.9655,0.8238,0.834,0.035
lightgbm,Light Gradient Boosting Machine,0.95,0.9875,0.975,0.9633,0.9655,0.8238,0.834,0.021
ridge,Ridge Classifier,0.9381,0.99,1.0,0.9348,0.9631,0.7571,0.7632,0.01
lda,Linear Discriminant Analysis,0.9381,0.99,1.0,0.9348,0.9631,0.7571,0.7632,0.01
et,Extra Trees Classifier,0.9381,1.0,1.0,0.9348,0.9631,0.7571,0.7632,0.056
rf,Random Forest Classifier,0.9214,1.0,0.975,0.9348,0.9488,0.7238,0.734,0.063


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2809
1,Target,y4_nononsensedefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8833,0.975,0.9,0.8917,0.8624,0.7733,0.8069,0.018
nb,Naive Bayes,0.8833,0.9653,0.85,0.925,0.849,0.7638,0.7994,0.01
ridge,Ridge Classifier,0.8833,0.9639,0.7833,0.95,0.83,0.7583,0.7852,0.01
lda,Linear Discriminant Analysis,0.8833,0.9639,0.7833,0.95,0.83,0.7583,0.7852,0.01
knn,K Neighbors Classifier,0.8667,0.9389,0.7667,0.8667,0.7967,0.7333,0.7488,0.015
qda,Quadratic Discriminant Analysis,0.8667,0.8625,0.75,0.8667,0.7933,0.7238,0.738,0.01
et,Extra Trees Classifier,0.8667,0.9778,0.8167,0.925,0.819,0.7305,0.7734,0.053
lightgbm,Light Gradient Boosting Machine,0.85,0.9667,0.8,0.825,0.7824,0.7067,0.7361,0.02
rf,Random Forest Classifier,0.8333,0.9764,0.75,0.9167,0.78,0.6638,0.7067,0.063
ada,Ada Boost Classifier,0.8333,0.9444,0.75,0.9167,0.78,0.6638,0.7067,0.036


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2469
1,Target,y5_fullback.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.95,0.9875,1.0,0.95,0.9709,0.8,0.8,0.02
knn,K Neighbors Classifier,0.9333,0.915,1.0,0.9333,0.9618,0.7,0.7,0.015
ada,Ada Boost Classifier,0.9333,0.975,0.98,0.95,0.9598,0.7571,0.7632,0.035
lda,Linear Discriminant Analysis,0.9333,0.9475,1.0,0.9333,0.9618,0.7,0.7,0.01
et,Extra Trees Classifier,0.9333,0.9688,1.0,0.9333,0.9618,0.7,0.7,0.054
ridge,Ridge Classifier,0.9167,0.9475,1.0,0.9133,0.9507,0.6571,0.6632,0.01
nb,Naive Bayes,0.9071,0.8975,0.96,0.9217,0.9356,0.7658,0.7724,0.01
rf,Random Forest Classifier,0.9,0.9425,0.955,0.9233,0.9376,0.6514,0.6484,0.064
lightgbm,Light Gradient Boosting Machine,0.9,0.9625,0.98,0.9133,0.9396,0.6143,0.6265,0.02
svm,SVM - Linear Kernel,0.869,0.92,0.96,0.8967,0.9166,0.5493,0.5725,0.01


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6696
1,Target,y6_allactionmidfielder.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9857,1.0,0.975,1.0,0.9857,0.972,0.975,0.053
lr,Logistic Regression,0.9357,0.9917,0.95,0.95,0.9429,0.872,0.8871,0.019
nb,Naive Bayes,0.9357,0.9639,0.95,0.955,0.946,0.8625,0.8797,0.01
ridge,Ridge Classifier,0.9333,0.9875,0.9167,0.975,0.9371,0.8667,0.8828,0.01
lda,Linear Discriminant Analysis,0.9333,0.9875,0.9167,0.975,0.9371,0.8667,0.8828,0.01
knn,K Neighbors Classifier,0.9214,0.9743,0.925,0.95,0.9321,0.8417,0.8538,0.015
rf,Random Forest Classifier,0.919,0.9917,0.95,0.925,0.9286,0.8387,0.8578,0.067
gbc,Gradient Boosting Classifier,0.8881,0.9292,0.8917,0.935,0.8967,0.7817,0.8071,0.036
lightgbm,Light Gradient Boosting Machine,0.8881,0.9694,0.8833,0.935,0.8957,0.775,0.7985,0.02
ada,Ada Boost Classifier,0.8714,0.9667,0.925,0.885,0.8964,0.7333,0.7528,0.035


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2588
1,Target,y7_midfieldplaymaker.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9667,1.0,0.96,1.0,0.9778,0.9143,0.9265,0.018
knn,K Neighbors Classifier,0.9357,0.9688,1.0,0.9333,0.9618,0.7588,0.7645,0.017
gbc,Gradient Boosting Classifier,0.9357,0.9875,0.98,0.9433,0.9576,0.8303,0.8543,0.034
ada,Ada Boost Classifier,0.9333,0.9875,0.96,0.96,0.9528,0.8476,0.8712,0.035
dt,Decision Tree Classifier,0.919,0.905,0.96,0.9433,0.9465,0.7874,0.8175,0.01
rf,Random Forest Classifier,0.919,0.97,1.0,0.9133,0.9507,0.716,0.7278,0.064
et,Extra Trees Classifier,0.919,0.9775,0.98,0.93,0.9487,0.7731,0.791,0.053
ridge,Ridge Classifier,0.9024,1.0,0.96,0.9267,0.9374,0.6874,0.7175,0.01
lda,Linear Discriminant Analysis,0.9024,1.0,0.96,0.9267,0.9374,0.6874,0.7175,0.01
lightgbm,Light Gradient Boosting Machine,0.9024,0.97,0.94,0.9433,0.9376,0.7103,0.7343,0.021


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,203
1,Target,y8_traditionalwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9048,0.9575,0.8,0.9167,0.83,0.7681,0.7904,0.024
et,Extra Trees Classifier,0.8881,0.98,0.75,0.9167,0.7967,0.7252,0.7537,0.055
ridge,Ridge Classifier,0.8571,0.9475,0.7,0.8333,0.7267,0.6384,0.6686,0.01
lda,Linear Discriminant Analysis,0.8571,0.9475,0.7,0.8333,0.7267,0.6384,0.6686,0.01
ada,Ada Boost Classifier,0.8381,0.94,0.75,0.7167,0.7167,0.6205,0.6347,0.035
rf,Random Forest Classifier,0.8357,0.96,0.6,0.8333,0.66,0.5779,0.6222,0.063
lightgbm,Light Gradient Boosting Machine,0.8071,0.8975,0.7,0.7333,0.6767,0.5462,0.5803,0.02
gbc,Gradient Boosting Classifier,0.8048,0.93,0.65,0.7,0.6533,0.536,0.5522,0.041
knn,K Neighbors Classifier,0.7905,0.9488,0.45,0.55,0.4833,0.4158,0.4257,0.017
dt,Decision Tree Classifier,0.7881,0.7525,0.65,0.7,0.65,0.5057,0.5249,0.01


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2794
1,Target,y9_invertedwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8857,0.95,0.85,0.8917,0.8557,0.7612,0.7809,0.018
et,Extra Trees Classifier,0.8857,0.9444,0.8333,0.8417,0.8257,0.741,0.7535,0.053
ada,Ada Boost Classifier,0.869,0.9306,0.8333,0.85,0.8314,0.7196,0.7352,0.035
dt,Decision Tree Classifier,0.8667,0.8583,0.8,0.8417,0.7957,0.7048,0.7252,0.01
rf,Random Forest Classifier,0.8524,0.9069,0.7167,0.8667,0.7733,0.6648,0.6794,0.066
gbc,Gradient Boosting Classifier,0.8524,0.925,0.6833,0.85,0.74,0.6612,0.6842,0.04
ridge,Ridge Classifier,0.85,0.9056,0.8167,0.85,0.8233,0.6917,0.7038,0.01
lda,Linear Discriminant Analysis,0.85,0.9056,0.8167,0.85,0.8233,0.6917,0.7038,0.01
qda,Quadratic Discriminant Analysis,0.8381,0.8444,0.7333,0.8833,0.78,0.6571,0.6787,0.01
nb,Naive Bayes,0.7548,0.8708,0.75,0.7267,0.7169,0.4821,0.5118,0.01


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2834
1,Target,y10_goalpoacher.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8714,0.97,0.65,0.55,0.58,0.53,0.5454,0.021
lightgbm,Light Gradient Boosting Machine,0.8548,0.8975,0.6,0.5,0.53,0.5,0.5154,0.02
ridge,Ridge Classifier,0.8357,0.9275,0.55,0.4167,0.4533,0.3839,0.4117,0.01
lda,Linear Discriminant Analysis,0.8357,0.9275,0.55,0.4167,0.4533,0.3839,0.4117,0.01
knn,K Neighbors Classifier,0.8333,0.7912,0.4,0.35,0.3667,0.3571,0.3632,0.014
nb,Naive Bayes,0.8333,0.8125,0.75,0.675,0.6833,0.5764,0.5897,0.01
ada,Ada Boost Classifier,0.8214,0.88,0.45,0.3667,0.3967,0.3538,0.364,0.035
et,Extra Trees Classifier,0.819,0.8238,0.35,0.45,0.3833,0.321,0.3328,0.053
dt,Decision Tree Classifier,0.8048,0.735,0.6,0.525,0.54,0.4482,0.4616,0.01
rf,Random Forest Classifier,0.8024,0.8,0.4,0.4,0.3833,0.3124,0.3212,0.065


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,395
1,Target,y11_targetman.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9333,0.9667,0.975,0.925,0.9429,0.8667,0.8828,0.019
ridge,Ridge Classifier,0.8881,0.9778,0.95,0.8917,0.9086,0.7697,0.8007,0.01
lda,Linear Discriminant Analysis,0.8881,0.9778,0.95,0.8917,0.9086,0.7697,0.8007,0.01
et,Extra Trees Classifier,0.8881,0.9611,0.9083,0.9167,0.8871,0.7697,0.804,0.051
lightgbm,Light Gradient Boosting Machine,0.8881,0.9583,0.95,0.8917,0.9086,0.7697,0.8007,0.02
rf,Random Forest Classifier,0.8857,0.9472,0.9167,0.905,0.8975,0.7696,0.7973,0.064
dt,Decision Tree Classifier,0.8714,0.8792,0.8583,0.9417,0.8781,0.743,0.78,0.011
svm,SVM - Linear Kernel,0.8548,0.9556,0.9083,0.8767,0.8621,0.703,0.7487,0.01
gbc,Gradient Boosting Classifier,0.8548,0.9319,0.85,0.9167,0.8529,0.703,0.7454,0.041
ada,Ada Boost Classifier,0.8048,0.9361,0.7917,0.875,0.8014,0.6097,0.6499,0.038


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded
Overall Accuracy: 0.9683794466403164


['model_results2575_withkeepersall.pkl']