<a href="https://colab.research.google.com/github/TrinaBan0807/icaai/blob/main/ROC_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from io import BytesIO
from google.colab import files


# Upload File
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

if file_name.endswith(".csv"):
    df = pd.read_csv(BytesIO(uploaded[file_name]))
elif file_name.endswith(".xls") or file_name.endswith(".xlsx"):
    df = pd.read_excel(BytesIO(uploaded[file_name]))
else:
    raise ValueError("Unsupported file format. Upload CSV or Excel.")

print("\nColumns found in file:")
print(df.columns.tolist())

# Column Selection

value_column = input("\nEnter numeric column for Mean/Median: ").strip()
true_col = input("Enter TRUE label column (binary 0/1): ").strip()
score_col = input("Enter predicted score/probability column: ").strip()

for col in [value_column, true_col, score_col]:
    if col not in df.columns:
        raise ValueError(f"Column '{col}' not found in dataset.")

df[value_column] = pd.to_numeric(df[value_column], errors="coerce")
df[true_col] = pd.to_numeric(df[true_col], errors="coerce")
df[score_col] = pd.to_numeric(df[score_col], errors="coerce")

df = df.dropna(subset=[value_column, true_col, score_col])

unique_labels = df[true_col].unique()
if not set(unique_labels).issubset({0, 1}):
    raise ValueError("TRUE label column must contain only 0 and 1 values.")

#  Mean & Median Calculation

mean_val = df[value_column].mean()
median_val = df[value_column].median()

print(f"\nMean of {value_column}: {mean_val}")
print(f"Median of {value_column}: {median_val}")

#  ROC Curve

fpr, tpr, _ = roc_curve(df[true_col], df[score_col])
roc_auc = auc(fpr, tpr)

print(f"\nAUC (Area Under ROC Curve): {roc_auc}")

plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.4f})")
plt.plot([0, 1], [0, 1], linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid(True)
plt.show()

#  Graph Selection

print("\nChoose graph type:")
print("1 - Bar Graph")
print("2 - Histogram")
print("3 - Line Graph")

choice = input("Enter choice (1/2/3): ").strip()

plt.figure(figsize=(7, 5))

if choice == "1":
    df[value_column].value_counts().plot(kind="bar")
    plt.title(f"Bar Graph of {value_column}")
    plt.xlabel(value_column)
    plt.ylabel("Frequency")

elif choice == "2":
    plt.hist(df[value_column], bins=20)
    plt.title(f"Histogram of {value_column}")
    plt.xlabel(value_column)
    plt.ylabel("Frequency")

elif choice == "3":
    plt.plot(df[value_column])
    plt.title(f"Line Graph of {value_column}")
    plt.xlabel("Index")
    plt.ylabel(value_column)

else:
    print("Invalid choice. Showing histogram.")
    plt.hist(df[value_column], bins=20)

plt.grid(True)
plt.show()
