In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score

In [2]:
file_path = "D:/Research/RQ2/classifier/data.json"
df = pd.read_json(file_path, lines=True)
df

Unnamed: 0,modelId,since-create,since-last-model-update,has-license,what-license,what-library,what-task,num-commit,num-discussion,model-size,...,num-table,num-emoji,num-heading,num-word,has-description,has-uses,has-limitations and bias,has-training,has-evaluation,Quality
0,DrishtiSharma/wav2vec2-large-xls-r-300m-ab-v4,1.855292,2.423890,0.798216,-0.461011,0.299299,-0.950938,-0.274891,-0.121624,-0.173520,...,0.604295,-0.136524,-0.014165,-0.267469,0.539733,1.173105,-0.242197,1.173105,1.722959,0
1,Helsinki-NLP/opus-mt-es-nl,4.140787,-0.013241,0.798216,-0.461011,0.299299,-1.361188,1.151112,-0.121624,-0.204531,...,0.604295,-0.136524,-0.594533,-0.352554,0.539733,-0.852439,-0.242197,-0.852439,1.722959,0
2,Helsinki-NLP/opus-mt-sv-tiv,4.115672,-0.013241,0.798216,-0.461011,0.299299,-1.361188,0.922952,-0.121624,-0.205394,...,0.604295,-0.136524,-0.594533,-0.352554,0.539733,1.173105,-0.242197,1.173105,-0.580397,0
3,MultiBertGunjanPatrick/multiberts-seed-15,2.303780,2.913895,0.798216,-0.461011,0.299299,-1.361188,-0.217851,-0.121624,-0.199954,...,-0.486313,-0.136524,0.179291,0.551616,0.539733,1.173105,4.128876,1.173105,-0.580397,1
4,SauravMaheshkar/clr-finetuned-bert-large-uncased,2.572873,2.961177,0.798216,0.936308,0.299299,-0.540687,-0.274891,-0.121624,-0.170989,...,0.604295,-0.136524,-0.691261,-0.353689,-1.852768,1.173105,-0.242197,1.173105,1.722959,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,Maelstrome/mermaid-gemmma-7b,-1.072438,-1.083515,-1.252793,-0.810340,-0.573449,0.690066,-0.103771,-0.121624,0.885893,...,-0.486313,-0.136524,1.049844,0.196528,0.539733,1.173105,4.128876,1.173105,1.722959,1
754,Niggendar/waiANINSFWPONYXL_v20,-1.076026,-1.087814,-1.252793,-0.810340,-1.354329,0.963566,-0.446012,-0.121624,0.009099,...,-0.486313,0.089426,2.694222,-0.037173,-1.852768,-0.852439,-0.242197,-0.852439,-0.580397,0
755,jayasuryajsk/Llama-3-8b-Telugu-Romanized,-1.079614,-1.092112,0.798216,-0.461011,1.217981,0.690066,-0.274891,-0.121624,0.302612,...,-0.486313,0.767274,-0.304349,0.292957,0.539733,-0.852439,-0.242197,-0.852439,-0.580397,0
756,gradientai/Llama-3-8B-Instruct-262k,-1.086789,-1.105007,0.798216,1.518524,1.217981,0.690066,0.694791,0.906233,0.302612,...,4.966727,-0.136524,1.823669,3.177907,-1.852768,1.173105,4.128876,1.173105,1.722959,1


In [3]:
# Feature and target variables
X = df.drop(columns=['Quality', 'modelId'])
y = df['Quality']

In [4]:
# Define layered 10-fold cross validation
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [5]:
# Machine Learning Models
machine_learning_models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Support Vector Machine": SVC(probability=False),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(max_iter=2000),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Create pipeline
def create_pipeline(model):
    return Pipeline([
        ('classifier', model)
    ])

In [6]:
results_per_fold = []
overall_results = []

for name, model in machine_learning_models.items():
    all_results = {'Technique': name, 'Fold': [], 'Precision': [], 'Recall': [], 'F1': [], 'Accuracy': [], 'AUC': [],
                   'Precision_0': [], 'Recall_0': [], 'F1_0': [], 'Precision_1': [], 'Recall_1': [], 'F1_1': []}
    all_y_true = []
    all_y_pred = []
    accuracies = []
    aucs = []

    fold = 1
    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        pipeline = create_pipeline(model)
        pipeline.fit(X_train, y_train)

        y_prob = pipeline.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else pipeline.predict(X_test)
        y_pred = (y_prob > 0.5).astype(int)

        accuracy = accuracy_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_prob)
        report = classification_report(y_test, y_pred, output_dict=True)

        # Collect per-fold results
        all_results['Fold'].append(f'Fold {fold}')
        all_results['Precision'].append(report['weighted avg']['precision'])
        all_results['Recall'].append(report['weighted avg']['recall'])
        all_results['F1'].append(report['weighted avg']['f1-score'])
        all_results['Accuracy'].append(accuracy)
        all_results['AUC'].append(auc)

        all_results['Precision_0'].append(report['0']['precision'])
        all_results['Recall_0'].append(report['0']['recall'])
        all_results['F1_0'].append(report['0']['f1-score'])
        all_results['Precision_1'].append(report['1']['precision'])
        all_results['Recall_1'].append(report['1']['recall'])
        all_results['F1_1'].append(report['1']['f1-score'])

        aucs.append(auc)
        accuracies.append(accuracy)
        all_y_true.extend(y_test)
        all_y_pred.extend(y_pred)

        fold += 1

    # Overall results
    overall_report = classification_report(all_y_true, all_y_pred, output_dict=True)
    overall_results.append({
        'Technique': name,
        'Precision': overall_report['weighted avg']['precision'],
        'Recall': overall_report['weighted avg']['recall'],
        'F1': overall_report['weighted avg']['f1-score'],
        'Accuracy': np.mean(accuracies),
        'AUC': np.mean(aucs),
        'Precision_0': overall_report['0']['precision'],
        'Recall_0': overall_report['0']['recall'],
        'F1_0': overall_report['0']['f1-score'],
        'Precision_1': overall_report['1']['precision'],
        'Recall_1': overall_report['1']['recall'],
        'F1_1': overall_report['1']['f1-score']
    })

    results_per_fold.append(all_results)


In [7]:
results_per_fold

[{'Technique': 'Logistic Regression',
  'Fold': ['Fold 1',
   'Fold 2',
   'Fold 3',
   'Fold 4',
   'Fold 5',
   'Fold 6',
   'Fold 7',
   'Fold 8',
   'Fold 9',
   'Fold 10'],
  'Precision': [0.8947368421052632,
   0.8403331561226298,
   0.8939777327935223,
   0.848558177051825,
   0.9073477812177502,
   0.8903508771929826,
   0.9119389382547277,
   0.868066631224526,
   0.8576998050682261,
   0.8651059085841692],
  'Recall': [0.8947368421052632,
   0.8421052631578947,
   0.8947368421052632,
   0.8421052631578947,
   0.9078947368421053,
   0.868421052631579,
   0.9078947368421053,
   0.868421052631579,
   0.8533333333333334,
   0.8666666666666667],
  'F1': [0.8947368421052632,
   0.8385627530364372,
   0.8936635706914345,
   0.833599149388623,
   0.9074477107153441,
   0.8587377534745956,
   0.9051885031728257,
   0.8654689608636976,
   0.8461928566253714,
   0.8651960784313726],
  'Accuracy': [0.8947368421052632,
   0.8421052631578947,
   0.8947368421052632,
   0.8421052631578947,
 

In [8]:
overall_results_df = pd.DataFrame(overall_results)
overall_results_df

Unnamed: 0,Technique,Precision,Recall,F1,Accuracy,AUC,Precision_0,Recall_0,F1_0,Precision_1,Recall_1,F1_1
0,Logistic Regression,0.875019,0.87467,0.871557,0.874632,0.937566,0.872928,0.948,0.908917,0.87907,0.732558,0.799154
1,K-Nearest Neighbors,0.801083,0.804749,0.798923,0.804842,0.873291,0.82,0.902,0.859048,0.764423,0.616279,0.682403
2,Decision Tree,0.887014,0.886544,0.886752,0.886491,0.875462,0.917339,0.91,0.913655,0.828244,0.841085,0.834615
3,Support Vector Machine,0.844179,0.840369,0.832647,0.840404,0.788077,0.830716,0.952,0.887232,0.87027,0.624031,0.726862
4,Random Forest,0.928307,0.927441,0.927733,0.927404,0.975158,0.953157,0.936,0.944501,0.88015,0.910853,0.895238
5,Gradient Boosting,0.919015,0.918206,0.9185,0.918193,0.967108,0.945122,0.93,0.9375,0.868421,0.895349,0.881679
6,Naive Bayes,0.80913,0.80343,0.789367,0.803439,0.866723,0.79397,0.948,0.864175,0.838509,0.523256,0.644391
7,Neural Network,0.88228,0.882586,0.882418,0.882596,0.937665,0.908549,0.914,0.911266,0.831373,0.821705,0.826511
8,XGBoost,0.915567,0.915567,0.915567,0.915579,0.965037,0.936,0.936,0.936,0.875969,0.875969,0.875969


In [9]:
# Save results to files
pd.DataFrame(results_per_fold).to_json("D:/Research/RQ2/classifier/Machine_Learning_Models/results_per_fold.json", orient='records', lines=True, force_ascii=False)
pd.DataFrame(overall_results).to_json("D:/Research/RQ2/classifier/Machine_Learning_Models/overall_results.json", orient='records', lines=True, force_ascii=False)