In [26]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, widgets
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Function to load and process data
def load_patient_data(file_path, chunk_size=100000):
    """
    Load the patient data in chunks and process relevant columns.
    """
    chunks = []
    for chunk in pd.read_csv(
        file_path, 
        usecols=['VITALSTATUSDATE', 'GENDER', 'ETHNICITY'],  # Only relevant columns
        dtype={'GENDER': 'int8', 'ETHNICITY': 'category'},  # Optimize data types
        parse_dates=['VITALSTATUSDATE'],  # Parse dates
        chunksize=chunk_size,
        low_memory=True
    ):
        chunks.append(chunk.dropna(subset=['VITALSTATUSDATE']))  # Drop rows with missing dates

    return pd.concat(chunks, ignore_index=True)

# Load the patient dataset
file_path = "C:/Users/wardo/Documents/Internship/cancer_download/Data/sim_av_patient.csv"
patient_df = load_patient_data(file_path)

# Map gender for better readability
gender_mapping = {1: 'Male', 2: 'Female', 9: 'Unknown'}
patient_df['GENDER_mapped'] = patient_df['GENDER'].map(gender_mapping)

# Define function for plotting timeline
def plot_timeline(gender_filter="All", ethnicity_filter="All", chart_type="Line", time_period="Yearly"):
    """
    Plots a timeline chart of deaths based on VITALSTATUSDATE, filtered by gender and ethnicity.
    """
    # Filter the dataset based on gender and ethnicity
    filtered_df = patient_df.copy()
    if gender_filter != "All":
        filtered_df = filtered_df[filtered_df['GENDER_mapped'] == gender_filter]
    if ethnicity_filter != "All":
        filtered_df = filtered_df[filtered_df['ETHNICITY'] == ethnicity_filter]

    # Resample data based on the selected time period
    if time_period == "Yearly":
        deaths_by_date = filtered_df['VITALSTATUSDATE'].dt.to_period('Y').value_counts().sort_index()
    elif time_period == "Monthly":
        deaths_by_date = filtered_df['VITALSTATUSDATE'].dt.to_period('M').value_counts().sort_index()

    # Plot the timeline chart
    plt.figure(figsize=(12, 6))
    if chart_type == "Line":
        deaths_by_date.sort_index().plot(kind='line', marker='o', color='steelblue', alpha=0.8)
    elif chart_type == "Bar":
        deaths_by_date.sort_index().plot(kind='bar', color='steelblue', alpha=0.8)

    # Add chart labels and title
    plt.title(f"Deaths Timeline ({time_period}, Gender: {gender_filter}, Ethnicity: {ethnicity_filter})", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Number of Deaths", fontsize=14)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Interactive Filters for Gender, Ethnicity, and Chart Type
gender_options = ["All"] + patient_df['GENDER_mapped'].dropna().unique().tolist()
ethnicity_options = ["All"] + patient_df['ETHNICITY'].dropna().unique().tolist()

# Create interactive dropdowns and sliders
@interact
def update_plot(
    gender_filter=widgets.Dropdown(options=gender_options, description="Gender:"),
    ethnicity_filter=widgets.Dropdown(options=ethnicity_options, description="Ethnicity:"),
    chart_type=widgets.Dropdown(options=["Line", "Bar"], description="Chart Type:"),
    time_period=widgets.Dropdown(options=["Yearly", "Monthly"], description="Time Period:")
):
    plot_timeline(gender_filter, ethnicity_filter, chart_type, time_period)


interactive(children=(Dropdown(description='Gender:', options=('All', 'Male', 'Female', 'Unknown'), value='All…