<a href="https://colab.research.google.com/github/hsandaver/hsandaver/blob/main/Entity_Filter_Slider.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages (uncomment and run if not already installed)
# !pip install ipywidgets plotly pandas
# !jupyter nbextension enable --py widgetsnbextension

# Import statements
import json
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output

# Function to extract date from JSON field
def extract_date(date_field):
    try:
        date_string = date_field[0]['time:inXSDDateTimeStamp']['@value']
        date = pd.to_datetime(date_string)
        return date
    except Exception:
        return None

# Function to extract a record from JSON data
def extract_record(data):
    name = data.get('prefLabel', {}).get('en', 'Unknown')
    dob = extract_date(data.get('dateOfBirth', [{}]))
    dod = extract_date(data.get('dateOfDeath', [{}]))
    description = data.get('description', {}).get('en', '')
    types = data.get('type', [])
    type_str = '; '.join(types) if isinstance(types, list) else str(types)
    return {
        'Name': name,
        'Date of Birth': dob,
        'Date of Death': dod,
        'Type': type_str,
        'Description': description
    }

# Function to create the interactive dashboard
def create_dashboard(df):
    # Ensure Date columns are datetime
    df['Date of Birth'] = pd.to_datetime(df['Date of Birth'], errors='coerce')
    df['Date of Death'] = pd.to_datetime(df['Date of Death'], errors='coerce')

    # Widgets
    min_year = int(min(df['Date of Birth'].dt.year.min(), df['Date of Death'].dt.year.min()))
    max_year = int(max(df['Date of Birth'].dt.year.max(), df['Date of Death'].dt.year.max()))

    year_slider = widgets.IntRangeSlider(
        value=[min_year, max_year],
        min=min_year,
        max=max_year,
        step=1,
        description='Year Range:',
        continuous_update=False,
        layout=widgets.Layout(width='80%')
    )
    type_options = df['Type'].unique()
    type_dropdown = widgets.SelectMultiple(
        options=type_options,
        value=tuple(type_options),
        description='Type:',
        layout=widgets.Layout(width='50%', height='100px')
    )
    search_box = widgets.Text(
        value='',
        description='Search Name:',
        layout=widgets.Layout(width='50%')
    )
    output_area = widgets.Output()

    def filter_data(*args):
        with output_area:
            clear_output()
            # Apply filters
            filtered_df = df.copy()
            start_year, end_year = year_slider.value

            # Handle NaT in date columns
            filtered_df['Date of Birth Year'] = filtered_df['Date of Birth'].dt.year.fillna(0).astype(int)
            filtered_df['Date of Death Year'] = filtered_df['Date of Death'].dt.year.fillna(9999).astype(int)

            # Filter entities whose lifespans overlap with the selected year range
            overlap = (
                (filtered_df['Date of Birth Year'] <= end_year) &
                (filtered_df['Date of Death Year'] >= start_year)
            )
            filtered_df = filtered_df[overlap]

            # Filter by type
            selected_types = type_dropdown.value
            if selected_types:
                filtered_df = filtered_df[filtered_df['Type'].isin(selected_types)]

            # Filter by search term
            search_term = search_box.value.lower()
            if search_term:
                filtered_df = filtered_df[filtered_df['Name'].str.lower().str.contains(search_term)]

            if filtered_df.empty:
                print("No data matches the filters.")
                return

            # Display filtered data
            display(filtered_df.drop(columns=['Date of Birth Year', 'Date of Death Year']))

            # Plot
            fig = px.timeline(
                filtered_df,
                x_start="Date of Birth",
                x_end="Date of Death",
                y="Name",
                color="Type",
                hover_data=['Description'],
                title='Timeline'
            )
            fig.update_yaxes(autorange="reversed")
            fig.update_layout(height=600)
            fig.show()

    # Attach event handlers
    year_slider.observe(filter_data, names='value')
    type_dropdown.observe(filter_data, names='value')
    search_box.observe(filter_data, names='value')

    # Display widgets and output
    ui = widgets.VBox([year_slider, type_dropdown, search_box])
    display(ui, output_area)

    # Initial call to display data
    filter_data()

# Function to handle file uploads and process data
def on_upload_change(change):
    df_list = []
    for filename in upload_widget.value:
        content = upload_widget.value[filename]['content']
        # Decode content to string
        json_str = content.decode('utf-8')
        try:
            data = json.loads(json_str)
            # Process data to extract relevant fields
            record = extract_record(data)
            df_list.append(record)
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
    if df_list:
        df = pd.DataFrame(df_list)
        create_dashboard(df)
    else:
        print("No valid data uploaded.")

# Create a FileUpload widget
upload_widget = widgets.FileUpload(
    accept='.json',
    multiple=True
)

# Observe the upload widget for changes
upload_widget.observe(on_upload_change, names='value')

# Display the upload widget
display(upload_widget)
