In [None]:
import pandas as pd

df_raw = pd.read_csv('TCDCIntlEpidAll.csv')

df_raw["date"] = pd.to_datetime(df_raw['effective']).dt.date

df_raw['date'] = pd.to_datetime(df_raw['date'], errors='coerce')

df_raw = df_raw.drop(
    ["sent", "effective", "source", "expires", "senderName", "instruction", "web", "alert_title", "severity_level", "circle"], axis=1)


print(df_raw.head(2))


import matplotlib.pyplot as plt

df_counts = df_raw.groupby([df_raw['date'].dt.to_period('M'), 'alert_disease']).size().reset_index(name='count')

# 🟢 Step 3: Convert Period to Timestamp for plotting
df_counts['date'] = df_counts['date'].dt.to_timestamp()

# 🟢 Step 4: Pivot to get diseases as columns and fill missing with 0
df_pivot = df_counts.pivot(index='date', columns='alert_disease', values='count').fillna(0)

# 🟢 Step 5: Normalize rows to get percentage
df_percent = df_pivot.div(df_pivot.sum(axis=1), axis=0) * 100

# 🟢 Step 6: Plot
plt.figure(figsize=(12, 6))
df_percent.plot(kind='area', stacked=True, figsize=(14, 7), colormap='tab20')
plt.ylabel('Percentage of Disease Notifications (%)')
plt.title('Monthly Proportion of Diseases Notified Over Time')
plt.xlabel('Date')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()