In [2]:
import os
import sys
import asyncio
import pandas as pd
import numpy as np
from sqlalchemy import text
from sqlalchemy.orm import Session

In [None]:

async def db_data(db: Session):
    try:
        """Fetch a single note and attach biopsy, general, mohs, and prescription results."""

        data = text("""SELECT * FROm progressNotes pn JOIN pnAssessment pa ON pn.noteId = pa.noteId WHERE pa.dxId IN (1120,
        1121,
        1122,
        1123,
        1124,
        1216,
        1596,
        1662,
        1663,
        1666,
        1667,
        1668,
        1830,
        1872,
        1895,
        2051,
        2052,
        2102,
        2256) AND pn.noteDate >= "2023-01-01 00:00:0000" """)
        dxd_result = db.execute(data)
        
        patient_ids = []

        for row in dxd_result.mappings():
            patient_ids.append(str(row['patientId']))
    except Exception as e:
        print(f"An error occurred: {e}")
        patient_ids = []
    return patient_ids

In [None]:
async def fetch_final_data(db: Session, patient_ids: list):
    """Fetch final data for given patient IDs."""
    final_df = """SELECT
        pn.noteId, pn.provider, pn.physician, pn.referringPhysician, pn.noteDate, pn.patientId,
        npn.complaints, npn.pastHistory, npn.assesment, npn.reviewofsystem, npn.currentmedication,
        npn.`procedure`, npn.biopsyNotes, npn.mohsNotes, npn.allergy, npn.examination, npn.patientSummary, npn.procedure, npn.assesment,
        group_concat(concat(dc.icd10Code, ' ', d.dxDescription)) AS diagnoses, pos.posName as PlaceOfService, CONCAT(p.firstName, ' ', p.lastName) as 'Rendering Provider', CONCAT(p2.firstName, ' ', p2.lastName) as 'Physician', CONCAT(p3.firstName, ' ', p3.lastName) as 'Referring Provider', CONCAT(p4.firstName, ' ', p4.lastName) as 'Billing Provider'
        FROM progressNotes pn
        LEFT JOIN providers p ON p.providerId = pn.provider
        LEFT JOIN providers p2 ON p2.providerId = pn.physician
        LEFT JOIN providers p3 ON p3.providerId = pn.referringPhysician
        LEFT JOIN providers p4 ON p4.providerId = pn.billingProvider
        LEFT JOIN newProgressNotes npn ON pn.noteId = npn.noteId
        LEFT JOIN placeOfService pos ON pos.posCodes = pn.placeOfService
        LEFT JOIN pnAssessment pa ON pa.noteId = pn.noteId
        LEFT JOIN diagnosis d ON d.dxId = pa.dxId
        LEFT JOIN diagnosisCodes dc ON dc.dxId = d.dxId AND dc.dxCodeId = pa.dxCodeId
        WHERE pn.physicianSignDate IS NOT NULL
          AND pn.patientId IN (""" + ",".join(patient_ids) + """) AND pn.noteDate >= "2023-01-01 00:00:0000" 
          GROUP BY pn.noteId"""
    final_result =db.execute(text(final_df)).fetchall()
    return final_result


In [None]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from db.db import get_db

In [None]:
import nest_asyncio
nest_asyncio.apply()

patients_ids = asyncio.run(db_data(next(get_db())))

In [None]:
patients_ids

In [None]:
import nest_asyncio
nest_asyncio.apply()


import asyncio


async def main():
    db = next(get_db()) 
    final_data = await fetch_final_data(db, patients_ids)
    return final_data


In [None]:
final_data = await main()
print(final_data)

In [None]:
import pandas as pd
final_df = pd.DataFrame(final_data)
final_df.to_csv('final_data.csv', index=False)

In [3]:
df = pd.read_csv('/home/umairasdev/Desktop/psoriasis_ML/notebook/final_data.csv')

In [4]:
len(df)

13538

In [6]:
len(df['patientId'].unique())

2108

In [None]:
df = df.drop(columns=['biopsyNotes', 'mohsNotes', 'procedure.1', 'assesment.1', 'referringPhysician', 'Physician'], axis=1)

In [None]:
df.columns

In [None]:
df.columns = df.columns.str.strip()

In [None]:
df.drop(columns=["Rendering Provider", "Referring Provider", "Billing Provider"], inplace=True)

In [None]:
df.columns

In [None]:
import pandas as pd
df['noteDate'] = pd.to_datetime(df['noteDate'], errors='coerce')

In [None]:
df = df.sort_values(['patientId','noteDate']).reset_index(drop=True)

In [None]:
df = df.drop_duplicates(subset=['noteId'])

In [None]:
df.columns

In [None]:
text_cols = ['complaints','pastHistory','assesment','reviewofsystem',
             'currentmedication','procedure','allergy','examination',
             'patientSummary','diagnoses']

df[text_cols] = df[text_cols].fillna("")

In [None]:
from bs4 import BeautifulSoup
import re

def clean_html(text):
    if not isinstance(text, str):
        return ""
    text = BeautifulSoup(text, "html.parser").get_text(separator=" ")
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

text_cols = [
    "complaints", "pastHistory", "assesment", "reviewofsystem",
    "currentmedication", "procedure", "allergy", "examination",
    "patientSummary", "diagnoses"
]

for col in text_cols:
    df[col] = df[col].fillna("").apply(clean_html)

In [None]:
flare_terms = [

    'flare', 'flaring', 'worse', 'worsening',
    'itch', 'itching', 'itchy',
    'red', 'redness', 'erythema',
    'scaling', 'flakes', 'flaky', 'peeling',
    'burning', 'stinging', 'pain', 'soreness',
    'rash', 'eruption', 'eruption', 'eruption',
    'lesion', 'patch', 'plaque', 'spot',

    'dry', 'dryness', 'cracking', 'bleeding',
    'irritated', 'inflamed', 'inflammation',
    'swelling', 'tender', 'thickened', 'rough',

    'recurrence', 'recurrent', 'outbreak', 'episode',
    'active', 'aggravation', 'exacerbation'
]

df['complaint_flare_kw'] = df['complaints'].str.lower().apply(lambda t: int(any(k in t for k in flare_terms)))
df['complaint_no_relief'] = df['complaints'].str.contains("without relief|no improvement", case=False, na=False).astype(int)

In [None]:
df['complaint_no_relief']

In [None]:
df['diagnosis_codes'] = df['diagnoses'].str.findall(r'[A-Z]\d{2}\.\d')
df['has_psoriasis'] = df['diagnoses'].str.contains('L40', case=False, na=False).astype(int)
df['psoriasis_type'] = df['diagnoses'].str.extract(r'(Plaque|Arthropathic|Guttate|Pustular)', expand=False)

In [None]:
def flag_any(t, keywords):
    t = t.lower()
    return int(any(k in t for k in keywords))

df['flare_in_assessment'] = df['assesment'].apply(lambda t: flag_any(t, ['flare', 'worsen', 'flare-up']))
df['trigger_mentioned'] = df['assesment'].apply(lambda t: flag_any(t, ['stress','infection','weather','medication']))
df['steroid_started'] = df['assesment'].apply(lambda t: flag_any(t, ['triamcinolone','steroid','ointment','cream']))

In [None]:
df['flare_in_assessment']

In [None]:
df['has_medications'] = ~df['currentmedication'].str.contains(
    "no active|none|not taking|no meds", case=False, na=False
)

df['on_steroid_med'] = df['currentmedication'].str.contains(
    "steroid|triamcinolone|clobetasol|hydrocortisone|betamethasone|mometasone|fluocinonide|desonide|prednisone|methylprednisolone|dexamethasone",
    case=False, na=False
)


df['on_biologic'] = df['currentmedication'].str.contains(
    "adalimumab|humira|secukinumab|cosentyx|ixekizumab|taltz|etanercept|enbrel|ustekinumab|stelara|guselkumab|tremfya|risankizumab|skyrizi|brodalumab|siliq|bimekizumab|tynlmya",
    case=False, na=False
)

In [None]:
df['plaques_present'] = df['examination'].str.contains("plaque", case=False, na=False)
df['silvery_scale'] = df['examination'].str.contains("silvery|scale", case=False, na=False)
df['elbows_involved'] = df['examination'].str.contains("elbow", case=False, na=False)
df['hyperpigmentation'] = df['examination'].str.contains("hyperpigment", case=False, na=False)

In [None]:
df['itch_present'] = df['reviewofsystem'].str.contains("itch", case=False, na=False)
df['dry_skin'] = df['reviewofsystem'].str.contains("dry skin", case=False, na=False)
df['fever_absent'] = df['reviewofsystem'].str.contains("no fever", case=False, na=False)

In [None]:
df['smoker'] = df['pastHistory'].str.contains("smoker", case=False, na=False)
df['alcohol_use'] = df['pastHistory'].str.contains("alcohol.*yes", case=False, na=False)
df['family_melanoma'] = df['pastHistory'].str.contains("melanoma.*yes", case=False, na=False)

In [None]:
df['patient_age'] = df['patientSummary'].str.extract(r'(\d{1,2})\s*year', expand=False).astype(float)
df['patient_gender'] = df['patientSummary'].str.extract(r'\b(Female|Male)\b', expand=False)
df['follow_up_visit'] = df['patientSummary'].str.contains("follow up", case=False, na=False)

In [None]:
df.shape

In [None]:
df['has_allergy'] = ~df['allergy'].str.contains("no known", case=False, na=False)

In [None]:
df.columns

In [None]:

df['flare_signal'] = (
    df['complaint_flare_kw'] |
    df['flare_in_assessment'] |
    df['itch_present']
).astype(int)

df['any_steroid_use'] = (
    df['steroid_started'] | df['on_steroid_med']
).astype(int)

df['flare_risk_score'] = (
    df['flare_signal']*2 + df['any_steroid_use'] + df['trigger_mentioned']
)


In [None]:
import mlflow

with mlflow.start_run(run_name="text_feature_extraction"):
    mlflow.log_param("n_rows", len(df))
    mlflow.log_metric("flare_signal_rate", df['flare_signal'].mean())
    mlflow.log_metric("steroid_use_rate", df['any_steroid_use'].mean())
    df.to_parquet("/tmp/text_features_v1.parquet", index=False)
    mlflow.log_artifact("/tmp/text_features_v1.parquet", "features")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

In [None]:
pd.set_option('display.max_colwidth', 200)
plt.figure(figsize=(10,5))
(df[text_cols].replace("", pd.NA).notna().mean().sort_values(ascending=False)*100).plot.bar(color="skyblue")
plt.title("Percentage of Non-Empty Text Entries per Column")
plt.ylabel("% of non-empty values")
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
df['noteDate'] = pd.to_datetime(df['noteDate'], errors='coerce')

plt.figure(figsize=(10,5))
df['noteDate'].dt.to_period('M').value_counts().sort_index().plot(kind='bar', color='lightcoral')
plt.title("Number of Notes per Month")
plt.ylabel("Notes count")
plt.xlabel("Month")
plt.xticks(rotation=45)
plt.show()


In [None]:
notes_per_patient = df.groupby('patientId')['noteId'].count()

plt.figure(figsize=(8,4))
sns.histplot(notes_per_patient, bins=30, kde=True, color='green')
plt.title("Distribution of Notes per Patient")
plt.xlabel("Number of Notes")
plt.ylabel("Patient Count")
plt.show()


In [None]:
from collections import Counter
from wordcloud import STOPWORDS

def plot_wordcloud(text_series, title):
    text = " ".join(text_series.tolist()).lower()
    wordcloud = WordCloud(width=800, height=400,
                          stopwords=STOPWORDS.union({"patient","apply","use","follow","follow-up"})).generate(text)
    plt.figure(figsize=(10,5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title(title, fontsize=14)
    plt.show()

plot_wordcloud(df['assesment'], "Word Cloud - Assessment Notes")
plot_wordcloud(df['complaints'], "Word Cloud - Complaints")
plot_wordcloud(df['examination'], "Word Cloud - Examination Findings")


In [None]:
flare_terms = ["flare", "flare-up", "worse", "worsen", "worsening"]
steroid_terms = ["steroid", "triamcinolone", "clobetasol", "ointment"]
trigger_terms = ["stress", "infection", "weather"]

def term_rate(series, terms):
    t = series.str.lower().fillna("")
    return sum(t.str.contains('|'.join(terms))) / len(series)

keyword_stats = {
    'flare_terms': term_rate(df['assesment'], flare_terms),
    'steroid_terms': term_rate(df['assesment'], steroid_terms),
    'trigger_terms': term_rate(df['assesment'], trigger_terms),
}

sns.barplot(x=list(keyword_stats.keys()), y=list(keyword_stats.values()), color="orange")
plt.title("Keyword Mentions in Assessment Notes")
plt.ylabel("Proportion of Notes (%)")
plt.show()


In [None]:
top_diagnoses = df['diagnosis_codes'].value_counts().head(10)
top_diagnoses.index = top_diagnoses.index.map(lambda x: ', '.join(x) if isinstance(x, list) else str(x))

In [None]:
df.columns

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(y=top_diagnoses.index, x=top_diagnoses.values, color="steelblue")
plt.title("Top Diagnoses in Dataset")
plt.xlabel("Count")
plt.ylabel("Diagnosis")
plt.show()

In [None]:
flare_features = ['complaint_flare_kw', 'flare_in_assessment', 'steroid_started', 'itch_present']
sns.heatmap(df[flare_features].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation between Flare-Related Features")
plt.show()


In [None]:
import mlflow

with mlflow.start_run(run_name="eda_visualization"):
    mlflow.log_metric("n_patients", df['patientId'].nunique())
    mlflow.log_metric("avg_notes_per_patient", notes_per_patient.mean())
    mlflow.log_metric("flare_term_rate", keyword_stats['flare_terms'])
    mlflow.log_metric("steroid_term_rate", keyword_stats['steroid_terms'])
    mlflow.log_metric("trigger_term_rate", keyword_stats['trigger_terms'])

In [None]:
df.shape

In [None]:
import numpy as np

df["flare_label"] = np.where(
    (df["flare_signal"] == 1) & (df["any_steroid_use"] == 1), 1, 0
)

In [None]:
df["flare_label"].sample(10)

In [None]:
text_cols = df.select_dtypes(include=['object']).columns.tolist()

In [None]:
text_cols

In [None]:
numeric_cols = df.select_dtypes(include=['bool']).columns.tolist()

In [None]:
numeric_cols

In [None]:
text_cols = [
    'complaints',
 'pastHistory',
 'assesment',
 'reviewofsystem',
 'currentmedication',
 'procedure',
 'allergy',
 'examination',
 'patientSummary',
 'diagnoses',
 'PlaceOfService',
 'patient_gender',
 'diagnosis_codes',
 'psoriasis_type'
]

numeric_cols = [
    'has_medications',
 'on_steroid_med',
 'on_biologic',
 'on_systemic_med',
 'on_topical_nonsteroid',
 'plaques_present',
 'silvery_scale',
 'elbows_involved',
 'hyperpigmentation',
 'itch_present',
 'dry_skin',
 'fever_absent',
 'smoker',
 'alcohol_use',
 'family_melanoma',
 'follow_up_visit',
 'has_allergy']


In [None]:
corrs = df.corr(numeric_only=True)["flare_label"].sort_values(ascending=False)
print(corrs.head(20))

In [None]:
df = df.sort_values(['patientId','noteDate']).reset_index(drop=True)

In [None]:
df['flare_label_next'] = df.groupby('patientId')['flare_label'].shift(-1)

In [None]:
df.shape

In [None]:
df = df.dropna(subset=['flare_label_next']).reset_index(drop=True)

In [None]:
df['flare_label_next'] = df['flare_label_next'].astype(int)
target_col = 'flare_label_next'
print("Remaining rows:", len(df), "Positive rate:", df[target_col].mean())

In [None]:
leak_cols = [
    'flare_label', 'flare_signal', 'flare_risk_score',
    'flare_in_assessment', 'any_steroid_use', 'steroid_started',
    'complaint_flare_kw', 'complaint_no_relief'
]

for c in leak_cols:
    if c in df.columns:
        df.pop(c)

[c for c in leak_cols if c in df.columns]


In [None]:
import re

def mask_post_flare_terms(text):
    if not isinstance(text, str):
        return ""

    text = re.sub(r'\b(flare|flares|flaring|flare-up|flare up|psoriasis flare)\b', ' ', text, flags=re.I)
    text = re.sub(r'\b(triamcinolone|clobetasol|hydrocortisone|ointment|apply|start|apply\s+\w+|prescribed|prescription|start\s+)\b', ' ', text, flags=re.I)
    
    text = re.sub(r'\b(apply|use)\b.*?(ointment|cream|gel)\b', ' ', text, flags=re.I)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

for col in ['assesment','complaints','examination','patientSummary','currentmedication']:
    df[col + '_clean'] = df[col].fillna('').apply(mask_post_flare_terms)


In [None]:

safe_numeric_cols = [
    "patient_age", "has_psoriasis", "on_steroid_med", "on_biologic",
    "itch_present", "dry_skin", "plaques_present", "silvery_scale",
    "elbows_involved", "hyperpigmentation", "smoker", "alcohol_use",
    "family_melanoma"
]

safe_numeric_cols = [c for c in safe_numeric_cols if c in df.columns]
X_num_df = df[safe_numeric_cols].astype(float).fillna(0)

text_inputs = ['assesment_clean', 'complaints_clean', 'examination_clean']
text_inputs = [c for c in text_inputs if c in df.columns]
print("Numeric features:", safe_numeric_cols)
print("Text fields:", text_inputs)


In [None]:
from sklearn.model_selection import GroupShuffleSplit

gss = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
train_idx, test_idx = next(gss.split(df, groups=df['patientId']))
train_df = df.iloc[train_idx].reset_index(drop=True)
test_df  = df.iloc[test_idx].reset_index(drop=True)

print("Train patients:", train_df['patientId'].nunique(), "Test patients:", test_df['patientId'].nunique())
print("Train pos rate:", train_df[target_col].mean(), "Test pos rate:", test_df[target_col].mean())


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
import joblib, os


train_text = (train_df['assesment_clean'].fillna('') + ' ' +
              train_df['complaints_clean'].fillna('') + ' ' +
              train_df['examination_clean'].fillna('')).astype(str)

tfidf = TfidfVectorizer(ngram_range=(1,2), max_features=10000, min_df=5, stop_words='english')
X_text_train = tfidf.fit_transform(train_text)

svd = TruncatedSVD(n_components=120, random_state=42)
X_text_train_svd = svd.fit_transform(X_text_train)


os.makedirs('/tmp/preproc', exist_ok=True)
joblib.dump(tfidf, '/tmp/preproc/tfidf.joblib')
joblib.dump(svd, '/tmp/preproc/svd.joblib')

print("TF-IDF vocab size:", len(tfidf.vocabulary_))
print("SVD components:", X_text_train_svd.shape)


In [None]:
scaler = StandardScaler()
X_num_train = scaler.fit_transform(train_df[safe_numeric_cols].fillna(0).astype(float).values)
joblib.dump(scaler, '/tmp/preproc/scaler.joblib')


X_text_train = tfidf.transform(train_text)
X_text_train_svd = svd.transform(X_text_train)

test_text = (test_df['assesment_clean'].fillna('') + ' ' +
             test_df['complaints_clean'].fillna('') + ' ' +
             test_df['examination_clean'].fillna('')).astype(str)
X_text_test = tfidf.transform(test_text)
X_text_test_svd = svd.transform(X_text_test)

X_num_test = scaler.transform(test_df[safe_numeric_cols].fillna(0).astype(float).values)


In [None]:
import numpy as np
X_train = np.hstack([X_num_train, X_text_train_svd])
X_test  = np.hstack([X_num_test,  X_text_test_svd])
y_train = train_df[target_col].values
y_test  = test_df[target_col].values


print(X_train.shape, X_test.shape, y_train.mean(), y_test.mean())


In [None]:
from lightgbm import LGBMClassifier
import lightgbm as lgb

clf = LGBMClassifier(
    n_estimators=1000,
    learning_rate=0.05,
    num_leaves=31,
    class_weight='balanced',
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)


clf.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='auc',
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=False)]
)


In [None]:
import pandas as pd

svd_feats = [f"svd_{i}" for i in range(X_text_train_svd.shape[1])]
feat_names = safe_numeric_cols + svd_feats

X_test_df = pd.DataFrame(X_test, columns=feat_names)

y_pred = clf.predict(X_test_df)
y_proba = clf.predict_proba(X_test_df)[:, 1]


In [None]:
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_fscore_support

print("Classification Report:")
print(classification_report(y_test, y_pred, digits=3))

roc_auc = roc_auc_score(y_test, y_proba)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')

print(f"ROC-AUC: {roc_auc:.3f}")
print(f"Precision: {precision:.3f} | Recall: {recall:.3f} | F1: {f1:.3f}")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

imp_df = pd.DataFrame({
    "feature": feat_names,
    "importance": clf.feature_importances_
}).sort_values("importance", ascending=False).head(20)

plt.figure(figsize=(8, 6))
sns.barplot(y="feature", x="importance", data=imp_df, palette="coolwarm")
plt.title("Top Feature Importances (LightGBM)")
plt.tight_layout()
plt.show()


In [None]:
import shap
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(X_test_df)

# For binary classification models, shap_values is a list: [class0, class1]
if isinstance(shap_values, list):
    shap_values = shap_values[1]

shap.summary_plot(shap_values, X_test_df, feature_names=feat_names, max_display=20)


In [None]:
def explain_patient(idx):
    vals = shap_values[idx]
    top = np.argsort(np.abs(vals))[-5:][::-1]
    return pd.DataFrame({
        "feature": np.array(feat_names)[top],
        "contribution": vals[top],
        "direction": ["↑" if v>0 else "↓" for v in vals[top]]
    })

explain_patient(0)


In [None]:
from sklearn.calibration import CalibratedClassifierCV
cal = CalibratedClassifierCV(estimator=clf, method="sigmoid", cv="prefit")
cal.fit(X_test, y_test)
y_prob_cal = cal.predict_proba(X_test)[:,1]
print("Calibrated ROC-AUC:", roc_auc_score(y_test, y_prob_cal))


In [None]:
def subgroup_metrics(df, subgroup_col):
    res=[]
    for g,v in df.groupby(subgroup_col):
        if len(v)<30: continue
        auc = roc_auc_score(v[target_col], v['pred_prob'])
        res.append((g, auc))
    return pd.DataFrame(res, columns=[subgroup_col,'AUC']).sort_values('AUC',ascending=False)

test_df['pred_prob']=y_proba
subgroup_metrics(test_df,'patient_gender')


In [None]:
import mlflow

with mlflow.start_run(run_name="flare_model_evaluation"):
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    imp_df.to_csv("/tmp/feature_importance.csv", index=False)
    mlflow.log_artifact("/tmp/feature_importance.csv", artifact_path="importances")


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["No Flare", "Flare"])
disp.plot(cmap="Blues")
plt.title("Flare Prediction Confusion Matrix")
plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc

fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6,6))
plt.plot(fpr, tpr, label=f"ROC curve (AUC = {roc_auc:.2f})")
plt.plot([0,1],[0,1],'--',color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.title("ROC Curve - Psoriasis Flare Prediction")
plt.show()