In [5]:
import joblib
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score , recall_score, precision_score, f1_score , matthews_corrcoef ,roc_auc_score
from model.preprocessing import decode_target ,encode_target,target_categories

In [6]:
df_test = pd.read_csv('data/Industrial Fabric Quality Inspection Dataset - Test.csv')
df_test.head()

Unnamed: 0,thread_count,gsm,tensile_strength,shrinkage_percent,color_fastness,fabric_thickness,defect_count,elongation_percent,moisture_absorption,fabric_type,...,batch_id,roll_number,inspection_time_minutes,warehouse_id,operator_name,inspection_shift,machine_temperature,humidity_level,inspection_notes,fabric_quality
0,140.0,255.0,95.510547,1.563016,3,1.474662,4,33.879259,4.022302,wool,...,403406,4777,19.169214,WH-C,John,Morning,27.504393,84.087561,Looks fine,High
1,152.0,,16.556885,6.109898,4,0.342559,9,33.472214,5.485526,silk,...,443810,1994,22.37735,WH-C,John,Night,23.166698,63.618708,Approved,Low
2,161.0,137.0,70.066985,8.38793,1,1.427341,14,26.086528,11.013039,wool,...,973276,4130,36.866134,WH-B,John,Evening,42.130868,77.569321,OK,Low
3,281.0,259.0,58.978794,3.893547,2,0.90819,10,13.561012,7.616445,polyester,...,333913,2300,15.945859,WH-A,John,Evening,38.561424,75.532385,Recheck,Low
4,266.0,380.0,,4.84747,2,1.128809,0,39.874294,12.612371,linen,...,235830,639,15.87746,WH-A,Priya,Morning,31.595069,41.665666,Recheck,High


| ML Model Name | Accuracy | AUC | Precision | Recall | F1 Score | MCC |
| :--- | :---: | :---: | :---: | :---: | :---: | :---: |
| **Logistic Regression** | | | | | | |
| **Decision Tree** | | | | | | |
| **kNN** | | | | | | |
| **Naive Bayes** | | | | | | |
| **Random Forest (Ensemble)**| | | | | | |
| **XGBoost (Ensemble)** | | | | | | |

In [7]:
root_path = 'model_files'
models = {'Logistic Regression': f"{root_path}/LogisticRegression.pkl",
            'Decision Tree': f"{root_path}/DecisionTreeClassifier.pkl",
            'kNN': f"{root_path}/KNeighborsClassifier.pkl",
            'Naive Bayes': f"{root_path}/NaiveBayes.pkl",
            'Random Forest (Ensemble)': f"{root_path}/RandomForestClassifier.pkl",
            'XGBoost (Ensemble)': f"{root_path}/XGBClassifier.pkl"
}

In [10]:
metrics_table = {'ML Model Name': [], 'Accuracy': [], 'AUC': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'MCC': []}
for model_name, model_path in models.items():
    model_pipeline = joblib.load(model_path)
    y_pred = model_pipeline.predict(df_test)
    y_score = model_pipeline.predict_proba(df_test)
    
    y_true = encode_target(df_test['fabric_quality'])
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred,average='weighted')
    rec = recall_score(y_true, y_pred,average='weighted')
    f1 = f1_score(y_true, y_pred,average='weighted')
    
    mcc = matthews_corrcoef(y_true, y_pred)
    auc = roc_auc_score(y_true, y_score,multi_class='ovr',average='weighted')

    # Compute metrics and fill the table accordingly  
    metrics_table['ML Model Name'].append(model_name)
    metrics_table['Accuracy'].append(round(acc,3))         
    metrics_table['AUC'].append(round(auc,3))
    metrics_table['Precision'].append(round(prec,3))
    metrics_table['Recall'].append(round(rec,3))
    metrics_table['F1 Score'].append(round(f1,3))
    metrics_table['MCC'].append(round(mcc,3))  

metrics_df = pd.DataFrame(metrics_table)
metrics_df

Unnamed: 0,ML Model Name,Accuracy,AUC,Precision,Recall,F1 Score,MCC
0,Logistic Regression,0.95,0.981,0.95,0.95,0.95,0.925
1,Decision Tree,0.798,0.849,0.799,0.798,0.798,0.697
2,kNN,0.812,0.928,0.815,0.812,0.813,0.718
3,Naive Bayes,0.616,0.803,0.615,0.616,0.615,0.424
4,Random Forest (Ensemble),0.876,0.966,0.879,0.876,0.877,0.815
5,XGBoost (Ensemble),0.906,0.976,0.909,0.906,0.907,0.86


In [11]:
metrics_df.to_markdown(index=False)

'| ML Model Name            |   Accuracy |   AUC |   Precision |   Recall |   F1 Score |   MCC |\n|:-------------------------|-----------:|------:|------------:|---------:|-----------:|------:|\n| Logistic Regression      |      0.95  | 0.981 |       0.95  |    0.95  |      0.95  | 0.925 |\n| Decision Tree            |      0.798 | 0.849 |       0.799 |    0.798 |      0.798 | 0.697 |\n| kNN                      |      0.812 | 0.928 |       0.815 |    0.812 |      0.813 | 0.718 |\n| Naive Bayes              |      0.616 | 0.803 |       0.615 |    0.616 |      0.615 | 0.424 |\n| Random Forest (Ensemble) |      0.876 | 0.966 |       0.879 |    0.876 |      0.877 | 0.815 |\n| XGBoost (Ensemble)       |      0.906 | 0.976 |       0.909 |    0.906 |      0.907 | 0.86  |'

| ML Model Name            |   Accuracy |   AUC |   Precision |   Recall |   F1 Score |   MCC |
|:-------------------------|-----------:|------:|------------:|---------:|-----------:|------:|
| Logistic Regression      |      0.95  | 0.981 |       0.95  |    0.95  |      0.95  | 0.925 |
| Decision Tree            |      0.798 | 0.849 |       0.799 |    0.798 |      0.798 | 0.697 |
| kNN                      |      0.812 | 0.928 |       0.815 |    0.812 |      0.813 | 0.718 |
| Naive Bayes              |      0.616 | 0.803 |       0.615 |    0.616 |      0.615 | 0.424 |
| Random Forest (Ensemble) |      0.876 | 0.966 |       0.879 |    0.876 |      0.877 | 0.815 |
| XGBoost (Ensemble)       |      0.906 | 0.976 |       0.909 |    0.906 |      0.907 | 0.86  |