In [1]:
import pandas as pd
from pycaret.classification import setup, compare_models, predict_model, load_model, finalize_model, save_model, pull
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import joblib

# Load the CSV file into a DataFrame
df = pd.read_csv('2575sgplayers (2024-08-29).csv')

# Assuming the last columns are the target labels and the rest are features
X = df.iloc[:, 1:16]
y = df.iloc[:, -11:]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of target column names
target_columns = y.columns

# Create an empty DataFrame to store results
results_df = pd.DataFrame(columns=['target', 'accuracy'])

for target in target_columns:
    X_target_train = X_train.copy()
    X_target_test = X_test.copy()
    y_target_train = y_train[target]
    y_target_test = y_test[target]
    df_target = pd.concat([X_target_train, y_target_train], axis=1)

    # Setup PyCaret environment
    clf = setup(data=df_target, target=target)

    # Compare different models
    best_model = compare_models()

    # Finalize the best model
    final_model = finalize_model(best_model)

    # Predict on the test set
    predictions = predict_model(final_model, data=X_target_test)

    # Assuming 'prediction_label' is the predicted label column (adjust if necessary)
    y_pred = predictions['prediction_label']

    # Calculate accuracy
    accuracy = accuracy_score(y_target_test, y_pred)

    # Append results to DataFrame
    new_row = pd.DataFrame({'target': target, 'accuracy': accuracy}, index=[0])
    results_df = pd.concat([results_df, new_row], ignore_index=True)

    # Save the model
    save_model(final_model, f'model_{target}')

    # Load the model for future predictions
    loaded_model = load_model(f'model_{target}')

# Save the results to a PKL file using joblib
joblib.dump(results_df, 'newmodel29aug.pkl')

Unnamed: 0,Description,Value
0,Session id,5885
1,Target,y1_tradkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9833,0.6,0.6,0.55,0.5667,,0.5632,0.017
gbc,Gradient Boosting Classifier,0.9833,0.59,0.6,0.55,0.5667,,0.5632,0.03
dt,Decision Tree Classifier,0.9667,0.58,0.6,0.5333,0.55,,0.5447,0.011
rf,Random Forest Classifier,0.9667,0.6,0.6,0.5333,0.55,,0.5447,0.06
ada,Ada Boost Classifier,0.9667,0.58,0.6,0.5333,0.55,,0.5447,0.011
lda,Linear Discriminant Analysis,0.9667,0.58,0.6,0.5333,0.55,,0.5447,0.011
et,Extra Trees Classifier,0.9667,0.6,0.6,0.5333,0.55,,0.5447,0.051
lightgbm,Light Gradient Boosting Machine,0.9667,0.6,0.5,0.5,0.5,,0.5,0.035
svm,SVM - Linear Kernel,0.9524,0.6,0.5,0.4333,0.45,,0.4447,0.01
lr,Logistic Regression,0.95,0.6,0.6,0.5333,0.55,,0.5447,1.012


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1085
1,Target,y2_sweeperkeeper.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9833,0.95,0.9,0.9,0.9,0.9,0.9,0.015
ridge,Ridge Classifier,0.9833,0.98,0.9,0.9,0.9,0.9,0.9,0.01
rf,Random Forest Classifier,0.9833,1.0,0.9,0.9,0.9,0.9,0.9,0.061
lda,Linear Discriminant Analysis,0.9833,0.98,0.9,0.9,0.9,0.9,0.9,0.011
et,Extra Trees Classifier,0.9833,1.0,0.9,0.9,0.9,0.9,0.9,0.055
xgboost,Extreme Gradient Boosting,0.9833,1.0,0.9,0.9,0.9,0.9,0.9,0.021
nb,Naive Bayes,0.9667,0.98,0.9,0.85,0.8667,0.8571,0.8632,0.009
dt,Decision Tree Classifier,0.9667,0.94,0.9,0.85,0.8667,0.8571,0.8632,0.01
ada,Ada Boost Classifier,0.9667,0.94,0.9,0.85,0.8667,0.8571,0.8632,0.01
gbc,Gradient Boosting Classifier,0.9667,0.94,0.9,0.85,0.8667,0.8571,0.8632,0.029


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8088
1,Target,y3_ballplayingdefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,0.9667,0.9375,0.975,0.9833,0.9766,0.8667,0.8707,0.01
lr,Logistic Regression,0.95,0.9875,0.975,0.96,0.9635,0.881,0.8972,0.019
knn,K Neighbors Classifier,0.95,0.9275,0.975,0.9633,0.9655,0.8238,0.834,0.015
gbc,Gradient Boosting Classifier,0.95,0.95,0.975,0.9667,0.9675,0.7667,0.7707,0.044
rf,Random Forest Classifier,0.9333,1.0,0.975,0.9433,0.9544,0.781,0.7972,0.062
ada,Ada Boost Classifier,0.9333,0.945,0.975,0.9383,0.9548,0.7821,0.7882,0.045
et,Extra Trees Classifier,0.9333,1.0,0.975,0.9433,0.9544,0.781,0.7972,0.06
xgboost,Extreme Gradient Boosting,0.9333,0.9375,0.955,0.9633,0.9544,0.781,0.7972,0.019
lda,Linear Discriminant Analysis,0.9167,0.98,0.955,0.9467,0.9453,0.681,0.6972,0.01
lightgbm,Light Gradient Boosting Machine,0.9167,0.9275,0.955,0.9433,0.9433,0.7381,0.7604,0.034


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6293
1,Target,y4_nononsensedefender.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9333,0.95,0.8667,0.975,0.899,0.8476,0.8679,0.062
et,Extra Trees Classifier,0.9333,0.9625,0.8667,0.975,0.899,0.8476,0.8679,0.054
lr,Logistic Regression,0.9167,0.9875,0.8667,0.925,0.8824,0.8155,0.8297,0.017
knn,K Neighbors Classifier,0.9167,0.9146,0.8333,0.975,0.879,0.8143,0.8386,0.015
svm,SVM - Linear Kernel,0.9167,1.0,0.9,0.925,0.8857,0.821,0.8472,0.01
ridge,Ridge Classifier,0.9167,0.9875,0.8,1.0,0.8633,0.8143,0.8419,0.01
lda,Linear Discriminant Analysis,0.9167,0.9875,0.8,1.0,0.8633,0.8143,0.8419,0.01
xgboost,Extreme Gradient Boosting,0.9167,0.9562,0.8667,0.96,0.8883,0.8143,0.8419,0.02
ada,Ada Boost Classifier,0.9024,0.9875,0.8667,0.935,0.874,0.7863,0.8169,0.041
nb,Naive Bayes,0.9,0.8833,0.8333,0.96,0.8683,0.781,0.8126,0.01


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1552
1,Target,y5_fullback.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9024,0.98,0.94,0.95,0.9394,0.6303,0.6543,0.02
rf,Random Forest Classifier,0.9024,0.9275,0.96,0.93,0.9394,0.6303,0.6543,0.063
ada,Ada Boost Classifier,0.9024,0.98,0.96,0.93,0.9394,0.6303,0.6543,0.036
et,Extra Trees Classifier,0.9024,0.9675,0.98,0.9167,0.9416,0.616,0.6278,0.053
xgboost,Extreme Gradient Boosting,0.9024,0.95,0.98,0.91,0.9394,0.6303,0.6543,0.019
knn,K Neighbors Classifier,0.8857,0.9738,0.96,0.9067,0.9285,0.6103,0.6343,0.017
ridge,Ridge Classifier,0.8857,0.96,0.96,0.9133,0.9303,0.5303,0.5543,0.01
lda,Linear Discriminant Analysis,0.8857,0.96,0.94,0.9267,0.9285,0.6103,0.6343,0.01
lightgbm,Light Gradient Boosting Machine,0.869,0.96,0.92,0.9267,0.9146,0.5864,0.6158,0.035
gbc,Gradient Boosting Classifier,0.8524,0.8188,0.9,0.9233,0.9037,0.5664,0.5958,0.037


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8559
1,Target,y6_allactionmidfielder.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.95,0.9688,0.975,0.955,0.9603,0.8905,0.9047,0.061
nb,Naive Bayes,0.919,0.9722,0.9417,0.93,0.926,0.8362,0.8559,0.01
xgboost,Extreme Gradient Boosting,0.9167,0.9542,0.9167,0.95,0.9229,0.8333,0.8536,0.019
ridge,Ridge Classifier,0.9024,0.9778,0.8833,0.955,0.906,0.8029,0.8266,0.01
lda,Linear Discriminant Analysis,0.9024,0.9778,0.8833,0.955,0.906,0.8029,0.8266,0.009
lr,Logistic Regression,0.9,1.0,0.9167,0.93,0.9117,0.7905,0.8168,0.023
rf,Random Forest Classifier,0.9,0.9764,0.9417,0.8967,0.9127,0.7905,0.8087,0.066
lightgbm,Light Gradient Boosting Machine,0.9,0.9639,0.95,0.905,0.921,0.7821,0.8004,0.033
knn,K Neighbors Classifier,0.8833,0.934,0.8917,0.935,0.9014,0.7583,0.7819,0.016
ada,Ada Boost Classifier,0.8667,0.9694,0.9167,0.8917,0.8921,0.6917,0.7078,0.035


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,1080
1,Target,y7_midfieldplaymaker.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9524,0.99,0.955,0.9833,0.9655,0.8362,0.8437,0.019
lightgbm,Light Gradient Boosting Machine,0.9524,1.0,1.0,0.9467,0.9707,0.816,0.8278,0.038
ada,Ada Boost Classifier,0.95,0.93,1.0,0.95,0.9727,0.7,0.7,0.039
knn,K Neighbors Classifier,0.9357,0.9838,1.0,0.93,0.9616,0.716,0.7278,0.015
rf,Random Forest Classifier,0.9357,0.95,1.0,0.93,0.9616,0.716,0.7278,0.065
gbc,Gradient Boosting Classifier,0.9357,0.87,0.98,0.95,0.9616,0.6696,0.673,0.036
et,Extra Trees Classifier,0.9357,0.97,1.0,0.9333,0.9636,0.6588,0.6645,0.054
nb,Naive Bayes,0.9333,0.8675,1.0,0.93,0.9616,0.6571,0.6632,0.012
xgboost,Extreme Gradient Boosting,0.9333,0.98,1.0,0.93,0.9616,0.6571,0.6632,0.02
dt,Decision Tree Classifier,0.919,0.8675,0.935,0.9633,0.9455,0.7162,0.7237,0.012


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,5385
1,Target,y8_traditionalwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.869,0.975,0.8167,0.8667,0.81,0.7155,0.7452,0.03
knn,K Neighbors Classifier,0.819,0.8562,0.5667,0.7167,0.61,0.5327,0.5585,0.017
et,Extra Trees Classifier,0.8167,0.9375,0.6,0.7167,0.63,0.5345,0.5538,0.051
ada,Ada Boost Classifier,0.8024,0.875,0.6167,0.7167,0.6433,0.5255,0.5452,0.035
ridge,Ridge Classifier,0.7857,0.875,0.7167,0.7333,0.69,0.5322,0.5659,0.011
lda,Linear Discriminant Analysis,0.7857,0.875,0.7167,0.7333,0.69,0.5322,0.5659,0.01
xgboost,Extreme Gradient Boosting,0.769,0.775,0.6167,0.6333,0.6067,0.4529,0.4645,0.021
rf,Random Forest Classifier,0.7357,0.8438,0.5167,0.6333,0.54,0.3672,0.3909,0.067
dt,Decision Tree Classifier,0.719,0.6833,0.5667,0.6333,0.5733,0.3719,0.3886,0.011
svm,SVM - Linear Kernel,0.7048,0.8125,0.4667,0.4667,0.4333,0.286,0.3182,0.011


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8794
1,Target,y9_invertedwinger.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8881,0.9889,0.8333,0.835,0.8207,0.7462,0.7669,0.023
et,Extra Trees Classifier,0.8714,0.9542,0.7833,0.835,0.7874,0.7033,0.7301,0.054
ada,Ada Boost Classifier,0.869,0.9639,0.85,0.91,0.8531,0.7291,0.7658,0.039
ridge,Ridge Classifier,0.8548,0.9694,0.8333,0.7767,0.7931,0.6795,0.7002,0.01
lda,Linear Discriminant Analysis,0.8548,0.9694,0.8333,0.7767,0.7931,0.6795,0.7002,0.01
gbc,Gradient Boosting Classifier,0.8381,0.9597,0.8333,0.86,0.8117,0.6795,0.7157,0.047
xgboost,Extreme Gradient Boosting,0.8357,0.9111,0.75,0.8017,0.7374,0.6291,0.6691,0.023
dt,Decision Tree Classifier,0.8214,0.8167,0.7333,0.8267,0.745,0.6176,0.6507,0.011
rf,Random Forest Classifier,0.8214,0.9611,0.6667,0.635,0.6407,0.5795,0.5962,0.067
nb,Naive Bayes,0.819,0.8847,0.8667,0.7917,0.799,0.6196,0.655,0.01


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,6202
1,Target,y10_goalpoacher.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9381,0.9775,0.8,0.9,0.8333,0.8143,0.8265,0.024
xgboost,Extreme Gradient Boosting,0.8714,0.93,0.75,0.6333,0.6767,0.6155,0.6297,0.023
knn,K Neighbors Classifier,0.869,0.83,0.5,0.8,0.6,0.5445,0.5808,0.017
gbc,Gradient Boosting Classifier,0.869,0.9188,0.8,0.7,0.7233,0.641,0.6649,0.047
et,Extra Trees Classifier,0.869,0.9075,0.55,0.75,0.6167,0.5553,0.5793,0.056
ridge,Ridge Classifier,0.8548,0.9125,0.65,0.6667,0.63,0.5631,0.5854,0.01
dt,Decision Tree Classifier,0.85,0.7775,0.65,0.5833,0.61,0.5383,0.5464,0.01
lda,Linear Discriminant Analysis,0.8381,0.9125,0.65,0.65,0.62,0.5381,0.5604,0.01
ada,Ada Boost Classifier,0.8357,0.905,0.65,0.7333,0.66,0.5564,0.5825,0.041
nb,Naive Bayes,0.819,0.76,0.65,0.6333,0.6267,0.5055,0.5153,0.009


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Description,Value
0,Session id,8619
1,Target,y11_targetman.1
2,Target type,Binary
3,Original data shape,"(88, 16)"
4,Transformed data shape,"(88, 16)"
5,Transformed train set shape,"(61, 16)"
6,Transformed test set shape,"(27, 16)"
7,Numeric features,15
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.919,0.9889,0.9417,0.9217,0.927,0.8291,0.8423,0.024
xgboost,Extreme Gradient Boosting,0.919,0.9472,0.875,0.975,0.9114,0.8387,0.8578,0.022
rf,Random Forest Classifier,0.9167,0.9778,0.9333,0.93,0.9203,0.8238,0.8461,0.065
dt,Decision Tree Classifier,0.9024,0.8917,0.9333,0.9017,0.9102,0.7934,0.811,0.011
ada,Ada Boost Classifier,0.8857,0.9889,0.8417,0.955,0.8803,0.7625,0.7918,0.04
et,Extra Trees Classifier,0.8857,0.9722,0.9333,0.8767,0.8959,0.76,0.7817,0.057
ridge,Ridge Classifier,0.8667,0.9889,0.9,0.8817,0.8663,0.7238,0.7567,0.011
lda,Linear Discriminant Analysis,0.8667,0.9889,0.9,0.8817,0.8663,0.7238,0.7567,0.01
lightgbm,Light Gradient Boosting Machine,0.8548,0.9583,0.8833,0.885,0.8671,0.7083,0.7399,0.032
gbc,Gradient Boosting Classifier,0.8381,0.9444,0.7833,0.9133,0.8289,0.67,0.6968,0.044


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded
Overall Accuracy: 0.948616600790514


['newmodel29aug.pkl']