In [24]:
# Import necessary libraries
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px
import geopandas as gpd
from tqdm.auto import tqdm
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.api import ExponentialSmoothing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from math import sqrt
# deprecate warnings
import warnings
warnings.filterwarnings('ignore')



In [25]:
df = pd.read_csv('../../Datasets/Misc/PTB_EPTB_Total_lab_clinical_historical_forecasts.csv')
df.head()

Unnamed: 0,Source Table,LGA,Year-Quarter,PTB Total Actual and Forecast,PTB Total Actual and Forecast Lower 95%,PTB Total Actual and Forecast Upper 95%,EPTB Total Actual and Forecast,EPTB Total Actual and Forecast Lower 95%,EPTB Total Actual and Forecast Upper 95%,lab diagnosed Actual and Forecast,lab diagnosed Actual and Forecast Lower 95%,lab diagnosed Actual and Forecast Upper 95%,clinically diagnosed Actual and Forecast,clinically diagnosed Actual and Forecast Lower 95%,clinically diagnosed Actual and Forecast Upper 95%,Total TB Cases notified Actual and Forecast,Total TB Cases notified Actual and Forecast Lower 95%,Total TB Cases notified Actual and Forecast Upper 95%
0,Original Data,Birnin Gwari,2019Q1,25.0,,,0.0,,,25.0,,,0.0,,,25.0,,
1,Original Data,Birnin Gwari,2019Q2,19.0,,,0.0,,,15.0,,,4.0,,,19.0,,
2,Original Data,Birnin Gwari,2019Q3,14.0,,,0.0,,,14.0,,,0.0,,,14.0,,
3,Original Data,Birnin Gwari,2019Q4,10.0,,,0.0,,,10.0,,,0.0,,,10.0,,
4,Original Data,Chikun,2019Q1,70.0,,,5.0,,,61.0,,,14.0,,,75.0,,


In [15]:
df.columns

Index(['Source Table', 'LGA', 'Year-Quarter', 'PTB Total Actual and Forecast',
       'PTB Total Actual and Forecast  Lower  95%',
       'PTB Total Actual and Forecast  Upper  95%',
       'EPTB Total Actual and Forecast',
       'EPTB Total Actual and Forecast  Lower  95%',
       'EPTB Total Actual and Forecast  Upper  95%',
       'lab diagnosed Actual and Forecast',
       'lab diagnosed Actual and Forecast  Lower  95%',
       'lab diagnosed Actual and Forecast  Upper  95%',
       'clinically diagnosed Actual and Forecast',
       'clinically diagnosed Actual and Forecast  Lower  95%',
       'clinically diagnosed Actual and Forecast  Upper  95%',
       'Total TB Cases notified Actual and Forecast',
       'Total TB Cases notified Actual and Forecast  Lower  95%',
       'Total TB Cases notified Actual and Forecast  Upper  95%'],
      dtype='object')

In [16]:
import plotly.graph_objects as go

# Create a list of traces for each LGA
traces = []
for lga in df['LGA'].unique():
    lga_data = df[df['LGA'] == lga]

    # Trace for the lower bound of the 95% confidence interval
    trace_lower = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['PTB Total Actual and Forecast  Lower  95%'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_lower)

    # Trace for the upper bound of the 95% confidence interval
    trace_upper = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['PTB Total Actual and Forecast  Upper  95%'],
        mode='lines',
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.5)',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_upper)

    # Trace for the PTB Total Actual and Forecast
    trace = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['PTB Total Actual and Forecast'],
        mode='lines',
        line=dict(color='blue'),
        name='PTB Total Actual and Forecast',
        visible=False  # Hide all traces to start
    )
    traces.append(trace)

# Create a layout with a dropdown menu
layout = go.Layout(
    title='PTB Total Actual and Forecast for each LGA',
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=['visible', [i//3 == j for i in range(len(traces))]],
                    label='LGA: ' + lga,
                    method='restyle'
                ) for j, lga in enumerate(df['LGA'].unique())
            ]),
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0,
            xanchor='left',
            y=1.1,
            yanchor='top'
        ),
    ]
)

# Create a figure and add the traces
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()


In [17]:
# Create a list of traces for each LGA
traces = []
for lga in df['LGA'].unique():
    lga_data = df[df['LGA'] == lga]

    # Trace for the lower bound of the 95% confidence interval
    trace_lower = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['EPTB Total Actual and Forecast  Lower  95%'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_lower)

    # Trace for the upper bound of the 95% confidence interval
    trace_upper = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['EPTB Total Actual and Forecast  Upper  95%'],
        mode='lines',
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.5)',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_upper)

    # Trace for the PTB Total Actual and Forecast
    trace = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['EPTB Total Actual and Forecast'],
        mode='lines',
        line=dict(color='blue'),
        name='EPTB Total Actual and Forecast',
        visible=False  # Hide all traces to start
    )
    traces.append(trace)

# Create a layout with a dropdown menu
layout = go.Layout(
    title='EPTB Total Actual and Forecast for each LGA',
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=['visible', [i//3 == j for i in range(len(traces))]],
                    label='LGA: ' + lga,
                    method='restyle'
                ) for j, lga in enumerate(df['LGA'].unique())
            ]),
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0,
            xanchor='left',
            y=1.1,
            yanchor='top'
        ),
    ]
)

# Create a figure and add the traces
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()


In [18]:
# Create a list of traces for each LGA
traces = []
for lga in df['LGA'].unique():
    lga_data = df[df['LGA'] == lga]

    # Trace for the lower bound of the 95% confidence interval
    trace_lower = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['lab diagnosed Actual and Forecast  Lower  95%'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_lower)

    # Trace for the upper bound of the 95% confidence interval
    trace_upper = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['lab diagnosed Actual and Forecast  Upper  95%'],
        mode='lines',
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.5)',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_upper)

    # Trace for the PTB Total Actual and Forecast
    trace = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['lab diagnosed Actual and Forecast'],
        mode='lines',
        line=dict(color='blue'),
        name='lab diagnosed Actual and Forecast',
        visible=False  # Hide all traces to start
    )
    traces.append(trace)

# Create a layout with a dropdown menu
layout = go.Layout(
    title='Total Lab Diagnosed Actual and Forecast for each LGA',
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=['visible', [i//3 == j for i in range(len(traces))]],
                    label='LGA: ' + lga,
                    method='restyle'
                ) for j, lga in enumerate(df['LGA'].unique())
            ]),
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0,
            xanchor='left',
            y=1.1,
            yanchor='top'
        ),
    ]
)

# Create a figure and add the traces
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()


In [19]:
df.columns

Index(['Source Table', 'LGA', 'Year-Quarter', 'PTB Total Actual and Forecast',
       'PTB Total Actual and Forecast  Lower  95%',
       'PTB Total Actual and Forecast  Upper  95%',
       'EPTB Total Actual and Forecast',
       'EPTB Total Actual and Forecast  Lower  95%',
       'EPTB Total Actual and Forecast  Upper  95%',
       'lab diagnosed Actual and Forecast',
       'lab diagnosed Actual and Forecast  Lower  95%',
       'lab diagnosed Actual and Forecast  Upper  95%',
       'clinically diagnosed Actual and Forecast',
       'clinically diagnosed Actual and Forecast  Lower  95%',
       'clinically diagnosed Actual and Forecast  Upper  95%',
       'Total TB Cases notified Actual and Forecast',
       'Total TB Cases notified Actual and Forecast  Lower  95%',
       'Total TB Cases notified Actual and Forecast  Upper  95%'],
      dtype='object')

In [20]:
# Create a list of traces for each LGA
traces = []
for lga in df['LGA'].unique():
    lga_data = df[df['LGA'] == lga]

    # Trace for the lower bound of the 95% confidence interval
    trace_lower = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['clinically diagnosed Actual and Forecast  Lower  95%'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_lower)

    # Trace for the upper bound of the 95% confidence interval
    trace_upper = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['clinically diagnosed Actual and Forecast  Upper  95%'],
        mode='lines',
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.5)',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_upper)

    # Trace for the PTB Total Actual and Forecast
    trace = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['clinically diagnosed Actual and Forecast'],
        mode='lines',
        line=dict(color='blue'),
        name='clinically diagnosed Actual and Forecast',
        visible=False  # Hide all traces to start
    )
    traces.append(trace)

# Create a layout with a dropdown menu
layout = go.Layout(
    title='Total Clinically Diagnosed Actual and Forecast for each LGA',
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=['visible', [i//3 == j for i in range(len(traces))]],
                    label='LGA: ' + lga,
                    method='restyle'
                ) for j, lga in enumerate(df['LGA'].unique())
            ]),
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0,
            xanchor='left',
            y=1.1,
            yanchor='top'
        ),
    ]
)

# Create a figure and add the traces
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()


In [21]:
df.columns

Index(['Source Table', 'LGA', 'Year-Quarter', 'PTB Total Actual and Forecast',
       'PTB Total Actual and Forecast  Lower  95%',
       'PTB Total Actual and Forecast  Upper  95%',
       'EPTB Total Actual and Forecast',
       'EPTB Total Actual and Forecast  Lower  95%',
       'EPTB Total Actual and Forecast  Upper  95%',
       'lab diagnosed Actual and Forecast',
       'lab diagnosed Actual and Forecast  Lower  95%',
       'lab diagnosed Actual and Forecast  Upper  95%',
       'clinically diagnosed Actual and Forecast',
       'clinically diagnosed Actual and Forecast  Lower  95%',
       'clinically diagnosed Actual and Forecast  Upper  95%',
       'Total TB Cases notified Actual and Forecast',
       'Total TB Cases notified Actual and Forecast  Lower  95%',
       'Total TB Cases notified Actual and Forecast  Upper  95%'],
      dtype='object')

In [22]:
# Create a list of traces for each LGA
traces = []
for lga in df['LGA'].unique():
    lga_data = df[df['LGA'] == lga]

    # Trace for the lower bound of the 95% confidence interval
    trace_lower = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['Total TB Cases notified Actual and Forecast  Lower  95%'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_lower)

    # Trace for the upper bound of the 95% confidence interval
    trace_upper = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['Total TB Cases notified Actual and Forecast  Upper  95%'],
        mode='lines',
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.5)',
        line=dict(width=0),
        showlegend=False,
        visible=False  # Hide all traces to start
    )
    traces.append(trace_upper)

    # Trace for the PTB Total Actual and Forecast
    trace = go.Scatter(
        x=lga_data['Year-Quarter'],
        y=lga_data['Total TB Cases notified Actual and Forecast'],
        mode='lines',
        line=dict(color='blue'),
        name='Total TB Cases notified Actual and Forecast',
        visible=False  # Hide all traces to start
    )
    traces.append(trace)

# Create a layout with a dropdown menu
layout = go.Layout(
    title='Total TB Diagnosed Actual and Forecast',
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=['visible', [i//3 == j for i in range(len(traces))]],
                    label='LGA: ' + lga,
                    method='restyle'
                ) for j, lga in enumerate(df['LGA'].unique())
            ]),
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0,
            xanchor='left',
            y=1.1,
            yanchor='top'
        ),
    ]
)

# Create a figure and add the traces
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()
