In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, classification_report
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, Flatten, Dense, SimpleRNN, Bidirectional,  MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam

In [2]:
file_path = "D:/Research/RQ2/classifier/data.json"
df = pd.read_json(file_path, lines=True)
df

Unnamed: 0,modelId,since-create,since-last-model-update,has-license,what-license,what-library,what-task,num-commit,num-discussion,model-size,...,num-table,num-emoji,num-heading,num-word,has-description,has-uses,has-limitations and bias,has-training,has-evaluation,Quality
0,DrishtiSharma/wav2vec2-large-xls-r-300m-ab-v4,1.855292,2.423890,0.798216,-0.461011,0.299299,-0.950938,-0.274891,-0.121624,-0.173520,...,0.604295,-0.136524,-0.014165,-0.267469,0.539733,1.173105,-0.242197,1.173105,1.722959,0
1,Helsinki-NLP/opus-mt-es-nl,4.140787,-0.013241,0.798216,-0.461011,0.299299,-1.361188,1.151112,-0.121624,-0.204531,...,0.604295,-0.136524,-0.594533,-0.352554,0.539733,-0.852439,-0.242197,-0.852439,1.722959,0
2,Helsinki-NLP/opus-mt-sv-tiv,4.115672,-0.013241,0.798216,-0.461011,0.299299,-1.361188,0.922952,-0.121624,-0.205394,...,0.604295,-0.136524,-0.594533,-0.352554,0.539733,1.173105,-0.242197,1.173105,-0.580397,0
3,MultiBertGunjanPatrick/multiberts-seed-15,2.303780,2.913895,0.798216,-0.461011,0.299299,-1.361188,-0.217851,-0.121624,-0.199954,...,-0.486313,-0.136524,0.179291,0.551616,0.539733,1.173105,4.128876,1.173105,-0.580397,1
4,SauravMaheshkar/clr-finetuned-bert-large-uncased,2.572873,2.961177,0.798216,0.936308,0.299299,-0.540687,-0.274891,-0.121624,-0.170989,...,0.604295,-0.136524,-0.691261,-0.353689,-1.852768,1.173105,-0.242197,1.173105,1.722959,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,Maelstrome/mermaid-gemmma-7b,-1.072438,-1.083515,-1.252793,-0.810340,-0.573449,0.690066,-0.103771,-0.121624,0.885893,...,-0.486313,-0.136524,1.049844,0.196528,0.539733,1.173105,4.128876,1.173105,1.722959,1
754,Niggendar/waiANINSFWPONYXL_v20,-1.076026,-1.087814,-1.252793,-0.810340,-1.354329,0.963566,-0.446012,-0.121624,0.009099,...,-0.486313,0.089426,2.694222,-0.037173,-1.852768,-0.852439,-0.242197,-0.852439,-0.580397,0
755,jayasuryajsk/Llama-3-8b-Telugu-Romanized,-1.079614,-1.092112,0.798216,-0.461011,1.217981,0.690066,-0.274891,-0.121624,0.302612,...,-0.486313,0.767274,-0.304349,0.292957,0.539733,-0.852439,-0.242197,-0.852439,-0.580397,0
756,gradientai/Llama-3-8B-Instruct-262k,-1.086789,-1.105007,0.798216,1.518524,1.217981,0.690066,0.694791,0.906233,0.302612,...,4.966727,-0.136524,1.823669,3.177907,-1.852768,1.173105,4.128876,1.173105,1.722959,1


In [3]:
# Feature and target variables
X = df.drop(columns=['Quality', 'modelId'])
y = df['Quality']

In [4]:
# Define layered 10-fold cross validation
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [5]:
np.random.seed(42)
tf.random.set_seed(42)

In [6]:
# Deep Learning Models
def cnn_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Conv1D(32, 3, activation='relu'),  
        Conv1D(64, 3, activation='relu'),  
        Flatten(),  
        Dense(64, activation='relu'),  
        Dense(32, activation='relu'),  
        Dense(16, activation='relu'),  
        Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])
    return model

def rnn_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        SimpleRNN(64, return_sequences=False),  
        Dense(64, activation='relu'), 
        Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])
    return model

def transformer_model(input_shape):
    inputs = Input(shape=input_shape)
    x = MultiHeadAttention(num_heads=4, key_dim=64)(inputs, inputs)
    x = LayerNormalization(epsilon=1e-6)(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)  
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])
    return model

In [None]:
# Convert DataFrame to NumPy arrays
X_np = X.to_numpy()
y_np = y.to_numpy()

# Dictionary of deep learning models
Deep_Learning_Models = {
    "CNN": cnn_model((X_np.shape[1], 1)),
    "RNN": rnn_model((1, X_np.shape[1])),
    "Transformer": transformer_model((1, X_np.shape[1]))
}

# Reshape CNN input to (samples, timesteps, features)
X_reshaped_cnn = X_np.reshape((X_np.shape[0], X_np.shape[1], 1))

# For RNN and Transformer, reshape X to (samples, time_steps, features)
X_reshaped_rnn_transformer = X_np.reshape((X_np.shape[0], 1, X_np.shape[1]))

results_per_fold = []
overall_results = []

for name, model in Deep_Learning_Models.items():
    all_results = {'Technique': name, 'Fold': [], 'Precision': [], 'Recall': [], 'F1': [], 'Accuracy': [], 'AUC': [],
                   'Precision_0': [], 'Recall_0': [], 'F1_0': [], 'Precision_1': [], 'Recall_1': [], 'F1_1': []}
    all_y_true = []
    all_y_pred = []
    accuracies = []
    aucs = []

    fold = 1
    for train_index, test_index in kf.split(X_np, y_np):
        if name == "CNN":
            model = cnn_model((X_np.shape[1], 1))
            X_train, X_test = X_reshaped_cnn[train_index], X_reshaped_cnn[test_index]
        elif name == "RNN":
            model = rnn_model((1, X_np.shape[1]))
            X_train, X_test = X_reshaped_rnn_transformer[train_index], X_reshaped_rnn_transformer[test_index]
        else:
            model = transformer_model((1, X_np.shape[1]))
            X_train, X_test = X_reshaped_rnn_transformer[train_index], X_reshaped_rnn_transformer[test_index]
        
        y_train, y_test = y_np[train_index], y_np[test_index]

        model.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=0)

        y_prob = model.predict(X_test).flatten()
        y_pred = (y_prob > 0.5).astype(int)
        auc = roc_auc_score(y_test, y_prob)
        aucs.append(auc)
        all_y_true.extend(y_test)
        all_y_pred.extend(y_pred)

        report = classification_report(y_test, y_pred, output_dict=True)

        all_results['Fold'].append(f'Fold {fold}')
        all_results['Precision'].append(report['weighted avg']['precision'])
        all_results['Recall'].append(report['weighted avg']['recall'])
        all_results['F1'].append(report['weighted avg']['f1-score'])
        all_results['Accuracy'].append(np.mean(y_test == y_pred))
        all_results['AUC'].append(auc)

        all_results['Precision_0'].append(report['0']['precision'])
        all_results['Recall_0'].append(report['0']['recall'])
        all_results['F1_0'].append(report['0']['f1-score'])
        all_results['Precision_1'].append(report['1']['precision'])
        all_results['Recall_1'].append(report['1']['recall'])
        all_results['F1_1'].append(report['1']['f1-score'])

        fold += 1

    overall_report = classification_report(all_y_true, all_y_pred, output_dict=True)
    overall_results.append({
        'Technique': name,
        'Precision': overall_report['weighted avg']['precision'],
        'Recall': overall_report['weighted avg']['recall'],
        'F1': overall_report['weighted avg']['f1-score'],
        'Accuracy': np.mean(all_results['Accuracy']),
        'AUC': np.mean(aucs),
        'Precision_0': overall_report['0']['precision'],
        'Recall_0': overall_report['0']['recall'],
        'F1_0': overall_report['0']['f1-score'],
        'Precision_1': overall_report['1']['precision'],
        'Recall_1': overall_report['1']['recall'],
        'F1_1': overall_report['1']['f1-score']
    })

    results_per_fold.append(all_results)

In [8]:
results_per_fold_df = pd.DataFrame(results_per_fold)

In [9]:
overall_results_df = pd.DataFrame(overall_results)
overall_results_df

Unnamed: 0,Technique,Precision,Recall,F1,Accuracy,AUC,Precision_0,Recall_0,F1_0,Precision_1,Recall_1,F1_1
0,CNN,0.796557,0.777045,0.75018,0.777035,0.916474,0.759812,0.968,0.851363,0.867769,0.406977,0.55409
1,RNN,0.790339,0.771768,0.74362,0.771737,0.861412,0.755869,0.966,0.848112,0.857143,0.395349,0.541114
2,Transformer,0.837029,0.837731,0.837341,0.837632,0.914975,0.873267,0.882,0.877612,0.766798,0.751938,0.759295


In [10]:
results_per_fold_df.to_json("D:/Research/RQ2/classifier/Deep_Learning_Models/results_per_fold.json", orient='records', lines=True, force_ascii=False)
overall_results_df.to_json("D:/Research/RQ2/classifier/Deep_Learning_Models/overall_results.json", orient='records', lines=True, force_ascii=False)