In [10]:
import pandas as pd
from pycaret.classification import setup, compare_models, predict_model, load_model, finalize_model, save_model, pull
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import joblib

# Load the CSV file into a DataFrame
df = pd.read_csv('data100_withkeepers.csv')

# Assuming the last columns are the target labels and the rest are features
X = df.iloc[:, 1:16]  # Features
y = df.iloc[:, -11:]  # Target variables

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of target column names
target_columns = y.columns

# Create an empty DataFrame to store results
results_df = pd.DataFrame(columns=['target', 'accuracy'])

for target in target_columns:
    # Create a new DataFrame with the current target and features
    X_target_train = X_train.copy()
    X_target_test = X_test.copy()
    y_target_train = y_train[target]
    y_target_test = y_test[target]
    df_target = pd.concat([X_target_train, y_target_train], axis=1)

    # Setup PyCaret environment
    clf = setup(data=df_target, target=target)

    # Compare different models
    best_model = compare_models()

    # Finalize the best model
    final_model = finalize_model(best_model)

    # Predict on the test set
    predictions = predict_model(final_model, data=X_target_test)

    # Assuming 'prediction_label' is the predicted label column (adjust if necessary)
    y_pred = predictions['prediction_label']

    # Calculate accuracy
    accuracy = accuracy_score(y_target_test, y_pred)

    # Append results to DataFrame
    new_row = pd.DataFrame({'target': target, 'accuracy': accuracy}, index=[0])
    results_df = pd.concat([results_df, new_row], ignore_index=True)

    # Save the model
    save_model(final_model, f'model_{target}')

    # Load the model for future predictions
    loaded_model = load_model(f'model_{target}')

# Save the results to a PKL file using joblib
joblib.dump(results_df, 'model_results100_withkeepers.pkl')

Unnamed: 0,Description,Value
0,Session id,6061
1,Target,y1_tradkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,1.0,0.6,0.6,0.6,0.6,,0.6,0.009
ada,Ada Boost Classifier,1.0,0.6,0.6,0.6,0.6,,0.6,0.01
gbc,Gradient Boosting Classifier,1.0,0.6,0.6,0.6,0.6,,0.6,0.032
lightgbm,Light Gradient Boosting Machine,0.9857,0.6,0.6,0.55,0.5667,,0.5645,0.037
rf,Random Forest Classifier,0.9667,0.6,0.4,0.4,0.4,,0.4,0.056
et,Extra Trees Classifier,0.9667,0.6,0.5,0.5,0.5,,0.5,0.065
ridge,Ridge Classifier,0.95,0.58,0.4,0.4,0.4,,0.4,0.01
lda,Linear Discriminant Analysis,0.95,0.58,0.4,0.4,0.4,,0.4,0.012
lr,Logistic Regression,0.9333,0.56,0.4,0.35,0.3667,,0.3632,0.012
knn,K Neighbors Classifier,0.9333,0.6,0.3,0.3,0.3,,0.3,0.014


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,3330
1,Target,y2_sweeperkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9667,0.98,0.8,0.8,0.8,0.8,0.8,0.016
dt,Decision Tree Classifier,0.95,0.89,0.8,0.8,0.8,0.78,0.78,0.009
gbc,Gradient Boosting Classifier,0.95,0.94,0.8,0.8,0.8,0.78,0.78,0.032
lightgbm,Light Gradient Boosting Machine,0.95,0.94,0.8,0.75,0.7667,0.7571,0.7632,0.031
ridge,Ridge Classifier,0.9333,0.92,0.8,0.75,0.7667,0.7371,0.7432,0.009
ada,Ada Boost Classifier,0.9333,0.94,0.75,0.8,0.7667,0.7371,0.7432,0.031
lda,Linear Discriminant Analysis,0.9333,0.92,0.8,0.75,0.7667,0.7371,0.7432,0.009
et,Extra Trees Classifier,0.9333,0.93,0.75,0.8,0.7667,0.7371,0.7432,0.046
knn,K Neighbors Classifier,0.919,0.94,0.6,0.7,0.6333,0.616,0.6278,0.014
svm,SVM - Linear Kernel,0.9167,0.9,0.8,0.7,0.7333,0.6943,0.7065,0.01


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,4695
1,Target,y3_ballplayingdefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.969,0.9637,0.98,0.98,0.9778,0.9267,0.9363,0.014
et,Extra Trees Classifier,0.9524,0.9875,1.0,0.9467,0.9707,0.816,0.8278,0.047
lr,Logistic Regression,0.95,0.98,0.98,0.96,0.9667,0.8714,0.8897,0.016
lightgbm,Light Gradient Boosting Machine,0.9333,0.9625,0.975,0.9433,0.9544,0.781,0.7972,0.028
ridge,Ridge Classifier,0.919,0.98,0.96,0.94,0.9444,0.7981,0.826,0.009
lda,Linear Discriminant Analysis,0.919,0.98,0.96,0.94,0.9444,0.7981,0.826,0.009
nb,Naive Bayes,0.9167,0.9062,0.95,0.935,0.9385,0.806,0.8222,0.009
rf,Random Forest Classifier,0.9048,0.965,0.93,0.935,0.9296,0.7788,0.789,0.056
svm,SVM - Linear Kernel,0.8881,0.9575,0.915,0.94,0.9163,0.7414,0.7785,0.01
ada,Ada Boost Classifier,0.8857,0.84,0.93,0.9233,0.9154,0.6931,0.721,0.032


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6808
1,Target,y4_nononsensedefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9357,0.95,0.9333,0.9417,0.9324,0.8696,0.8771,0.031
lr,Logistic Regression,0.919,0.9778,0.9333,0.9167,0.9181,0.8387,0.8498,0.021
rf,Random Forest Classifier,0.9167,0.9667,0.8667,0.9667,0.9067,0.8333,0.8455,0.057
gbc,Gradient Boosting Classifier,0.9024,0.9111,0.85,0.95,0.8838,0.7934,0.807,0.034
et,Extra Trees Classifier,0.9,0.9556,0.8667,0.95,0.8914,0.8,0.8243,0.05
dt,Decision Tree Classifier,0.869,0.8583,0.8167,0.9167,0.8505,0.7267,0.7403,0.014
lightgbm,Light Gradient Boosting Machine,0.8333,0.9556,0.7833,0.8833,0.8014,0.6571,0.6868,0.03
ridge,Ridge Classifier,0.819,0.9556,0.8,0.7583,0.7705,0.6387,0.6538,0.01
lda,Linear Discriminant Analysis,0.819,0.9556,0.8,0.7583,0.7705,0.6387,0.6538,0.009
nb,Naive Bayes,0.7881,0.8667,0.7333,0.785,0.7379,0.5795,0.6083,0.009


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8690
1,Target,y5_fullback.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9333,0.9306,1.0,0.915,0.9496,0.8571,0.8787,0.049
ridge,Ridge Classifier,0.9048,0.9278,0.975,0.8967,0.926,0.7935,0.8225,0.012
lda,Linear Discriminant Analysis,0.9048,0.9278,0.975,0.8967,0.926,0.7935,0.8225,0.009
rf,Random Forest Classifier,0.8857,0.9431,0.9417,0.885,0.9063,0.76,0.7777,0.058
knn,K Neighbors Classifier,0.8833,0.9354,0.975,0.875,0.9131,0.7381,0.7759,0.018
lightgbm,Light Gradient Boosting Machine,0.8833,0.9444,0.9417,0.8917,0.9086,0.7333,0.7414,0.03
lr,Logistic Regression,0.869,0.9458,0.9167,0.88,0.8925,0.7279,0.7395,0.019
ada,Ada Boost Classifier,0.85,0.9653,0.9,0.885,0.8738,0.6983,0.7319,0.038
dt,Decision Tree Classifier,0.819,0.8042,0.8917,0.8617,0.8556,0.5934,0.6345,0.009
gbc,Gradient Boosting Classifier,0.819,0.9069,0.8,0.8967,0.8346,0.6362,0.6559,0.039


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1020
1,Target,y6_allactionmidfielder.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9667,0.9778,0.9333,1.0,0.96,0.9333,0.9414,0.03
lr,Logistic Regression,0.9333,1.0,0.9083,0.975,0.9314,0.8667,0.8828,0.017
rf,Random Forest Classifier,0.9167,0.9889,0.875,0.975,0.9114,0.8333,0.8536,0.059
ridge,Ridge Classifier,0.9024,0.9764,0.9083,0.93,0.906,0.8029,0.8266,0.009
lda,Linear Discriminant Analysis,0.9024,0.9764,0.9083,0.93,0.906,0.8029,0.8266,0.01
dt,Decision Tree Classifier,0.9,0.9042,0.9083,0.925,0.9029,0.8,0.8243,0.011
gbc,Gradient Boosting Classifier,0.9,0.9549,0.9083,0.925,0.9029,0.8,0.8243,0.035
ada,Ada Boost Classifier,0.8857,0.9938,0.9417,0.89,0.901,0.7696,0.8006,0.033
et,Extra Trees Classifier,0.869,0.9764,0.8083,0.9467,0.8613,0.7362,0.7599,0.05
knn,K Neighbors Classifier,0.8667,0.9368,0.8083,0.9417,0.8517,0.725,0.7445,0.015


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,3231
1,Target,y7_midfieldplaymaker.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9333,0.9625,0.975,0.94,0.9524,0.8381,0.8604,0.021
lightgbm,Light Gradient Boosting Machine,0.9,0.95,0.975,0.895,0.9306,0.7536,0.778,0.029
dt,Decision Tree Classifier,0.8667,0.8375,0.925,0.8817,0.8988,0.6964,0.7147,0.01
lda,Linear Discriminant Analysis,0.8667,0.9625,0.925,0.9017,0.9042,0.6726,0.6929,0.01
knn,K Neighbors Classifier,0.85,0.875,0.975,0.8533,0.9013,0.5952,0.6237,0.016
ridge,Ridge Classifier,0.85,0.9625,0.925,0.8767,0.8935,0.631,0.6472,0.01
gbc,Gradient Boosting Classifier,0.85,0.9188,0.9,0.9,0.8905,0.6429,0.6702,0.045
et,Extra Trees Classifier,0.85,0.8938,0.95,0.8617,0.8963,0.6202,0.6487,0.056
rf,Random Forest Classifier,0.819,0.9375,0.905,0.855,0.8718,0.5612,0.5901,0.061
ada,Ada Boost Classifier,0.819,0.8875,0.905,0.855,0.8687,0.5708,0.6189,0.034


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,3912
1,Target,y8_traditionalwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9357,0.75,0.4,0.4,0.4,,0.4,0.015
svm,SVM - Linear Kernel,0.9048,0.73,0.5,0.45,0.4667,,0.4466,0.009
lda,Linear Discriminant Analysis,0.9048,0.71,0.5,0.4,0.4333,,0.4098,0.01
ada,Ada Boost Classifier,0.9024,0.7433,0.2,0.2,0.2,,0.2,0.038
lightgbm,Light Gradient Boosting Machine,0.9024,0.72,0.3,0.3,0.3,,0.3,0.042
ridge,Ridge Classifier,0.8857,0.71,0.3,0.2,0.2333,,0.2265,0.012
rf,Random Forest Classifier,0.8857,0.6833,0.1,0.1,0.1,,0.1,0.064
knn,K Neighbors Classifier,0.869,0.5717,0.0,0.0,0.0,,0.0,0.016
qda,Quadratic Discriminant Analysis,0.869,0.4067,0.0,0.0,0.0,,0.0,0.012
et,Extra Trees Classifier,0.869,0.7633,0.1,0.05,0.0667,,0.0632,0.049


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,2661
1,Target,y9_invertedwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.919,0.64,0.4,0.4,0.4,,0.4,0.014
lr,Logistic Regression,0.9024,0.78,0.3,0.25,0.2667,,0.2632,0.018
ridge,Ridge Classifier,0.9024,0.64,0.3,0.3,0.3,,0.3,0.011
gbc,Gradient Boosting Classifier,0.9024,0.63,0.2,0.2,0.2,,0.2,0.036
dt,Decision Tree Classifier,0.8833,0.5,0.2,0.2,0.2,,0.2,0.009
knn,K Neighbors Classifier,0.869,0.53,0.0,0.0,0.0,,0.0,0.018
rf,Random Forest Classifier,0.869,0.72,0.0,0.0,0.0,,0.0,0.065
qda,Quadratic Discriminant Analysis,0.869,0.67,0.0,0.0,0.0,,0.0,0.01
ada,Ada Boost Classifier,0.869,0.6067,0.1,0.1,0.1,,0.1,0.033
et,Extra Trees Classifier,0.869,0.7433,0.0,0.0,0.0,,0.0,0.056


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,5324
1,Target,y10_goalpoacher.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9524,0.15,0.0,0.0,0.0,,0.0,0.014
rf,Random Forest Classifier,0.9524,0.24,0.0,0.0,0.0,,0.0,0.06
qda,Quadratic Discriminant Analysis,0.9524,0.22,0.0,0.0,0.0,,0.0,0.01
ada,Ada Boost Classifier,0.9524,0.1767,0.0,0.0,0.0,,0.0,0.034
et,Extra Trees Classifier,0.9524,0.2017,0.0,0.0,0.0,,0.0,0.048
lightgbm,Light Gradient Boosting Machine,0.9524,0.1467,0.0,0.0,0.0,,0.0,0.034
dummy,Dummy Classifier,0.9524,0.15,0.0,0.0,0.0,,0.0,0.008
lr,Logistic Regression,0.9357,0.15,0.0,0.0,0.0,,0.0,0.012
ridge,Ridge Classifier,0.9357,0.2233,0.0,0.0,0.0,,0.0,0.011
gbc,Gradient Boosting Classifier,0.9357,0.15,0.0,0.0,0.0,,0.0,0.033


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,5747
1,Target,y11_targetman.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9214,0.95,0.8833,0.9417,0.8824,0.8268,0.8518,0.06
lightgbm,Light Gradient Boosting Machine,0.8762,0.9042,0.8333,0.85,0.83,0.7321,0.7423,0.031
lr,Logistic Regression,0.8548,0.9333,0.8,0.9,0.7967,0.6973,0.7443,0.026
ridge,Ridge Classifier,0.8548,0.9889,0.7667,0.9167,0.79,0.6906,0.7358,0.012
lda,Linear Discriminant Analysis,0.8548,0.9889,0.7667,0.9167,0.79,0.6906,0.7358,0.009
et,Extra Trees Classifier,0.8524,0.9097,0.8,0.8667,0.81,0.6917,0.7153,0.047
dt,Decision Tree Classifier,0.8214,0.8125,0.75,0.75,0.72,0.5995,0.6234,0.009
ada,Ada Boost Classifier,0.8071,0.9042,0.7,0.7167,0.7,0.5635,0.5689,0.032
gbc,Gradient Boosting Classifier,0.8071,0.8958,0.7833,0.6917,0.7224,0.5785,0.5939,0.038
knn,K Neighbors Classifier,0.7405,0.8861,0.5833,0.8,0.6233,0.4439,0.4983,0.014


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


['model_results100_withkeepers.pkl']