In [None]:
!pip install mlflow

In [None]:
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score

In [None]:
df = pd.read_csv("apachejit.csv")
print (df)

In [None]:
X = df.drop(columns=["buggy"]).values
Y = df["buggy"].values

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y
)

In [None]:
model1 = LogisticRegression(max_iter=1000, solver="liblinear").fit(X_train, Y_train)

In [None]:
model2 =  RandomForestClassifier(n_estimators=200, max_depth=12, random_state=42, n_jobs=-1).fit(X_train, Y_train)

In [None]:
Y_pred1 = model1.predict(X_test)
Y_prob1 = model1.predict_proba(X_test)[:, 1]

print('Metrics for LogisticRegression: \n')
print("Accuracy :", accuracy_score(Y_test, Y_pred1))
print("Precision:", precision_score(Y_test, Y_pred1, zero_division=0))
print("Recall   :", recall_score(Y_test, Y_pred1, zero_division=0))
print("F1 score :", f1_score(Y_test, Y_pred1, zero_division=0))
print("ROC-AUC  :", roc_auc_score(Y_test, Y_prob1))

In [None]:
Y_pred2 = model2.predict(X_test)
Y_prob2 = model2.predict_proba(X_test)[:, 1]

print('\nMetrics for RandomForestClassifier: \n')
print("Accuracy :", accuracy_score(Y_test, Y_pred2))
print("Precision:", precision_score(Y_test, Y_pred2, zero_division=0))
print("Recall   :", recall_score(Y_test, Y_pred2, zero_division=0))
print("F1 score :", f1_score(Y_test, Y_pred2, zero_division=0))
print("ROC-AUC  :", roc_auc_score(Y_test, Y_prob2))

In [None]:
fpr1, tpr1, thresholds = roc_curve(Y_test, Y_prob1)
fpr2, tpr2, thresholds = roc_curve(Y_test, Y_prob2)

# Plot ROC curve
plt.figure(figsize=(6, 4))
plt.plot(fpr1, tpr1, label="ROC curve")
plt.plot([0, 1], [0, 1], 'k--', label="Random guess")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Logistic Regression)")
plt.legend()
plt.show()

plt.figure(figsize=(6, 4))
plt.plot(fpr2, tpr2, label="ROC curve")
plt.plot([0, 1], [0, 1], 'k--', label="Random guess")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (RandomForestClassifier)")
plt.legend()
plt.show()