In [1]:
# third-party
import pandas as pd
import numpy as np

# local
from filepath import FILEPATH

In [2]:
overview = pd.read_excel(FILEPATH, sheet_name="overview")


# Get summary of Input Data

First we need to separate each entry into individual types of input data

In [3]:
input_exp = pd.concat([overview["Year"], overview["Input data (description)"].str.split(" and ", expand=True)], axis=1).reindex(overview.index)
input_exp


Unnamed: 0,Year,0,1,2,3,4,5,6
0,2021,EEG,seizure times,,,,,
1,2022,EEG cyclic profile,,,,,,
2,2013,EEG,,,,,,
3,2024,EEG,,,,,,
4,2022,EEG,,,,,,
5,2023,HR,,,,,,
6,2017,EEG,seizure cyclic profile,,,,,
7,2020,seizure cyclic profile,,,,,,
8,2022,IEA,seizure times,IEA cyclic profile,,,,
9,2020,EEG cyclic profile,,,,,,


In [4]:
input_year = input_exp.melt(id_vars=["Year"], value_name="Input")
input_year.dropna(inplace=True)
input_year.drop('variable', axis=1, inplace=True)
input_year


Unnamed: 0,Year,Input
0,2021,EEG
1,2022,EEG cyclic profile
2,2013,EEG
3,2024,EEG
4,2022,EEG
5,2023,HR
6,2017,EEG
7,2020,seizure cyclic profile
8,2022,IEA
9,2020,EEG cyclic profile


Count frequency of input according to year

In [73]:
freq = input_year.groupby(["Year", "Input"]).Input.count().unstack(fill_value=0).stack().reset_index()
freq.rename(columns={0: "freq"}, inplace=True)
freq['color'] = pd.factorize(freq['Input'])[0]
freq

Unnamed: 0,Year,Input,freq,color
0,2013,ACC,0,0
1,2013,BVP,0,1
2,2013,EDA,0,2
3,2013,EEG,1,3
4,2013,EEG cyclic profile,0,4
...,...,...,...,...
121,2024,seizure times,0,13
122,2024,signal quality metrics,0,14
123,2024,sleep,0,15
124,2024,sleep cyclic profile,0,16


## Bubble Chart 

In [89]:
import plotly.graph_objects as go
import plotly.express as px

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=freq["Year"], y=freq["Input"],
    marker_size=freq['freq'],
    marker_color=freq['color'],
    ))

fig.update_traces(mode='markers', marker=dict(sizemode='area',
                                              sizeref= 2.*max(freq['freq'])/(100**2), line_width=2))

fig.update_layout(
    title='Data input for seizure forecasting algorithms across time',
    xaxis=dict(
        title='Year',
        gridcolor='white',
        type='log',
        gridwidth=2,
    ),
    yaxis=dict(
        title='Input data',
        gridcolor='white',
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)
fig.update_yaxes(type='category', tickmode='linear')


fig.show()


## Line Chart

In [75]:
fig = px.line(freq, x="Year", y="freq", color='Input')
fig.update_layout(yaxis_range=[0,4], xaxis_range=[2013,2024])

fig.show()