To use dash and Plotly, make sure to run these commands in a terminal to install the necessary packages:

pip install dash <br>
pip install Plotly <br>
pip install jupyter-dash <br>

Make sure to also install pandas if you need to with this command:

pip install pandas

In [2]:
import plotly
import dash
import jupyter_dash

# This cell should show you what version you have installed when executed
print("Plotly version:", plotly.__version__)
print("Dash version:", dash.__version__)
print("JupyterDash version:", jupyter_dash.__version__)


Plotly version: 5.22.0
Dash version: 2.18.2
JupyterDash version: 0.4.2


In [3]:
import numpy as np
import pandas as pd

print(np.__version__)
print(pd.__version__)


1.26.4
2.2.2


in terminal to run code below use pip install dash Plotly <br>
Make sure you have NumPy version 1.26.4 installed so that the code works. <br>

Make sure to run the first 2 code cells above before starting the dash server below (i.e run the code cell below). <br>

To see the server in the browser: <br>
- Windows: Go to http://localhost:8050/
- Mac: Go to http://localhost:8888/


you might need to do pip install numpy<2    in your terminal

# COVID-19 Data Visualization and Prediction Dashboard

This code creates an interactive dashboard using Dash to visualize COVID-19 data (cases and deaths) by U.S. states. It allows users to select a state, data type (cases or deaths), display option (raw data or 7-day moving average), scale type (linear or logarithmic), and a date range for the data. The dashboard also generates trend line predictions using linear or polynomial regressiotmialFeatures


In [6]:
from dash import dcc, html, Input, Output
import plotly.graph_objs as go
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Load and prepare your data
df = pd.read_csv('super_covid19_data.csv')

# Prepare cases and deaths data separately
cases_columns = [col for col in df.columns if '_cases' in col]
deaths_columns = [col for col in df.columns if '_deaths' in col]

# Melt cases data
df_cases = df.melt(id_vars=['countyFIPS', 'County Name_x', 'State', 'StateFIPS_cases'],
                   value_vars=cases_columns,
                   var_name='date',
                   value_name='cases')
df_cases['date'] = pd.to_datetime(df_cases['date'].str.replace('_cases', ''))

# Melt deaths data
df_deaths = df.melt(id_vars=['countyFIPS', 'County Name_x', 'State', 'StateFIPS_deaths'],
                    value_vars=deaths_columns,
                    var_name='date',
                    value_name='deaths')
df_deaths['date'] = pd.to_datetime(df_deaths['date'].str.replace('_deaths', ''))

# Initialize the Dash app
app = dash.Dash(__name__)

# Layout
app.layout = html.Div([
    html.H1("COVID-19 Data by State", style={'color': 'blue'}),
    
    html.Label("Select State:", style={'color': 'blue'}),
    dcc.Dropdown(
        id='state-dropdown',
        options=[{'label': state, 'value': state} for state in df_cases['State'].unique()],
        value='NC'
    ),
    
    html.Label("Select Data Type:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='data-type-radio',
        options=[
            {'label': 'Cases', 'value': 'cases'},
            {'label': 'Deaths', 'value': 'deaths'}
        ],
        value='cases',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),
    
    html.Label("Display Option:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='display-option-radio',
        options=[
            {'label': 'Raw Data', 'value': 'raw'},
            {'label': '7-Day Moving Average', 'value': 'moving_avg'}
        ],
        value='raw',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),

    html.Label("Scale:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='scale-radio',
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Log', 'value': 'log'}
        ],
        value='linear',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),
    
    


    
    html.Label("Select Date Range:", style={'color': 'blue'}),
    dcc.DatePickerRange(
        id='date-picker-range',
        min_date_allowed=df_cases['date'].min(),
        max_date_allowed=df_cases['date'].max(),
        start_date=df_cases['date'].min(),
        end_date=df_cases['date'].max()
    ),
    
    dcc.Graph(id='covid-data-graph')
])

# Functions for predictions
def generate_linear_prediction(dates, values):
    model = LinearRegression()
    dates_ordinal = np.array([date.toordinal() for date in dates]).reshape(-1, 1)
    model.fit(dates_ordinal, values)
    return model.predict(dates_ordinal)

def generate_nonlinear_prediction(dates, values, degree=3):
    poly = PolynomialFeatures(degree)
    dates_ordinal = np.array([date.toordinal() for date in dates]).reshape(-1, 1)
    dates_poly = poly.fit_transform(dates_ordinal)
    model = LinearRegression()
    model.fit(dates_poly, values)
    return model.predict(dates_poly)

# Callback to update the graph based on inputs
@app.callback(
    Output('covid-data-graph', 'figure'),
    [Input('state-dropdown', 'value'),
     Input('data-type-radio', 'value'),
     Input('display-option-radio', 'value'),
     Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date'),
     Input('scale-radio', 'value')]
)
def update_graph(selected_state, data_type, display_option, start_date, end_date, scale_type):
    # Select the appropriate dataset
    if data_type == 'cases':
        filtered_df = df_cases[(df_cases['State'] == selected_state) &
                               (df_cases['date'] >= start_date) &
                               (df_cases['date'] <= end_date)]
        daily_data = filtered_df.groupby('date')['cases'].sum().reset_index()
        y_label = 'Number of Cases'
    else:
        filtered_df = df_deaths[(df_deaths['State'] == selected_state) &
                                (df_deaths['date'] >= start_date) &
                                (df_deaths['date'] <= end_date)]
        daily_data = filtered_df.groupby('date')['deaths'].sum().reset_index()
        y_label = 'Number of Deaths'
    
    # Apply 7-day cumulative moving average if selected
    if display_option == 'moving_avg':
        daily_data['value'] = daily_data[data_type].rolling(window=7).sum()
    else:
        daily_data['value'] = daily_data[data_type]


    # Generate trend line predictions
    if scale_type == 'linear':
        trend_line = generate_linear_prediction(daily_data['date'], daily_data['value'].fillna(0))
    else:
        trend_line = generate_nonlinear_prediction(daily_data['date'], daily_data['value'].fillna(0))

    # Create the figure
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=daily_data['date'], y=daily_data['value'],
        mode='lines', name=f'Daily {data_type.capitalize() if display_option == "raw" else "7-Day Avg"}'
    ))
    fig.add_trace(go.Scatter(
        x=daily_data['date'], y=trend_line,
        mode='lines', name='Trend Line (Prediction)',
        line=dict(dash='dash')
    ))
    
    # Apply scale
    fig.update_layout(
        yaxis_type=scale_type,
        title=f"COVID-19 {data_type.capitalize()} in {selected_state}",
        xaxis_title="Date",
        yaxis_title=y_label
    )
    
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


# COVID-19 Data Visualization with Dash

This Dash app provides a dynamic and interactive way to explore COVID-19 cases and deaths data by state. Users can filter and visualize data, select different options for analysis, and view trend predictions based on both linear and non-linear models.

## 1. Data Preparation
- **Loading Data**: The data is loaded from a CSV file named `super_covid19_data.csv`.
- **Case and Death Data**: The columns in the dataset corresponding to cases and deaths are identified and separated into two lists:
  - `cases_columns`: Contains column names related to COVID-19 cases.
  - `deaths_columns`: Contains column names related to COVID-19 deaths.
- **Melting Data**: The data is reshaped (melted) to make it suitable for analysis:
  - `df_cases`: Contains case data for each county, along with a date column.
  - `df_deaths`: Contains death data for each county, along with a date column.
- **Date Parsing**: The date values in the 'date' columns are converted from string to datetime objects for proper date handling.

## 2. App Layout
- **Heading**: Displays the title "COVID-19 Data by State".
- **Dropdown for States**: Allows users to select a state to analyze. The options are dynamically generated from the unique states in the dataset.
- **Radio Buttons for Data Type**: Users can choose between viewing either 'Cases' or 'Deaths'.
- **Radio Buttons for Display Option**: Allows users to toggle between viewing 'Raw Data' or '7-Day Moving Average' of cases/deaths.
- **Radio Buttons for Scale**: Users can choose between 'Linear' or 'Log' scale for the y-axis.
- **Date Picker**: Users can select a range of dates for the data visualization.

## 3. Prediction Functions
- **Linear Prediction**: The function `generate_linear_prediction` uses the `LinearRegression` model to fit a linear trend line to the data. It transforms the dates into ordinal format and uses them as inputs for prediction.
- **Non-linear Prediction**: The function `generate_nonlinear_prediction` uses the `PolynomialFeatures` class to fit a polynomial regression model (default degree 3) to the data.

## 4. Callback Function to Update Graph
- **Inputs**: The callback is triggered by the following inputs:
  - State selected from the dropdown.
  - Data type ('cases' or 'deaths') selected from the radio buttons.
  - Display option ('raw' or 'moving_avg') selected from the radio buttons.
  - Date range selected from the date picker.
  - Scale type ('linear' or 'log') selected from the radio buttons.
  
- **Data Filtering**: Based on the selected inputs, the data is filtered to include the specified state, date range, and data type (cases or deaths). It also calculates daily values by taking the difference between consecutive data points (`diff()` method).
  
- **Moving Average**: If the "7-Day Moving Average" option is selected, the data is smoothed by applying a 7-day rolling sum to the daily counts.

- **Trend Line Generation**: Depending on the selected scale type ('linear' or 'log'), a trend line is generated using either linear or non-linear (polynomial) regression models.

- **Figure Creation**: A Plotly figure is created with the following traces:
  - A trace for the raw or smoothed data (based on the selected display option).
  - A trace for the trend line (predicted values).
  
- **Graph Updates**: The graph is updated with the appropriate scale (linear or log), title, and axis labels.

## 5. Running the App
The app runs on a local server with debugging enabled, and the user interface is rendered in the browser for interaction.

## Key Features:
- Dynamic selection of states, data types (cases or deaths), display options (raw or moving average), and scales (linear or log).
- Trend prediction using linear and polynomial regression models.
- Interactive graph visualization that updates based on user inputs.


In [7]:
from dash import dcc, html, Input, Output
import plotly.graph_objs as go
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Load and prepare your data
df = pd.read_csv('super_covid19_data.csv')

# Prepare cases and deaths data separately
cases_columns = [col for col in df.columns if '_cases' in col]
deaths_columns = [col for col in df.columns if '_deaths' in col]

# Melt cases data
df_cases = df.melt(id_vars=['countyFIPS', 'County Name_x', 'State', 'StateFIPS_cases'],
                   value_vars=cases_columns,
                   var_name='date',
                   value_name='cases')
df_cases['date'] = pd.to_datetime(df_cases['date'].str.replace('_cases', ''))

# Melt deaths data
df_deaths = df.melt(id_vars=['countyFIPS', 'County Name_x', 'State', 'StateFIPS_deaths'],
                    value_vars=deaths_columns,
                    var_name='date',
                    value_name='deaths')
df_deaths['date'] = pd.to_datetime(df_deaths['date'].str.replace('_deaths', ''))

# Initialize the Dash app
app = dash.Dash(__name__)

# Layout
app.layout = html.Div([
    html.H1("COVID-19 Data by State", style={'color': 'blue'}),
    
    html.Label("Select State:", style={'color': 'blue'}),
    dcc.Dropdown(
        id='state-dropdown',
        options=[{'label': state, 'value': state} for state in df_cases['State'].unique()],
        value='NC'
    ),
    
    html.Label("Select Data Type:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='data-type-radio',
        options=[
            {'label': 'Cases', 'value': 'cases'},
            {'label': 'Deaths', 'value': 'deaths'}
        ],
        value='cases',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),
    
    html.Label("Display Option:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='display-option-radio',
        options=[
            {'label': 'Raw Data', 'value': 'raw'},
            {'label': '7-Day Moving Average', 'value': 'moving_avg'}
        ],
        value='raw',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),

    html.Label("Scale:", style={'color': 'blue'}),
    dcc.RadioItems(
        id='scale-radio',
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Log', 'value': 'log'}
        ],
        value='linear',
        labelStyle={'display': 'inline-block', 'color': 'blue'}
    ),
    
    html.Label("Select Date Range:", style={'color': 'blue'}),
    dcc.DatePickerRange(
        id='date-picker-range',
        min_date_allowed=df_cases['date'].min(),
        max_date_allowed=df_cases['date'].max(),
        start_date=df_cases['date'].min(),
        end_date=df_cases['date'].max()
    ),
    
    dcc.Graph(id='covid-data-graph')
])

# Functions for predictions
def generate_linear_prediction(dates, values):
    model = LinearRegression()
    dates_ordinal = np.array([date.toordinal() for date in dates]).reshape(-1, 1)
    model.fit(dates_ordinal, values)
    return model.predict(dates_ordinal)

def generate_nonlinear_prediction(dates, values, degree=3):
    poly = PolynomialFeatures(degree)
    dates_ordinal = np.array([date.toordinal() for date in dates]).reshape(-1, 1)
    dates_poly = poly.fit_transform(dates_ordinal)
    model = LinearRegression()
    model.fit(dates_poly, values)
    return model.predict(dates_poly)

# Callback to update the graph based on inputs
@app.callback(
    Output('covid-data-graph', 'figure'),
    [Input('state-dropdown', 'value'),
     Input('data-type-radio', 'value'),
     Input('display-option-radio', 'value'),
     Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date'),
     Input('scale-radio', 'value')]
)
def update_graph(selected_state, data_type, display_option, start_date, end_date, scale_type):
    # Select the appropriate dataset
    if data_type == 'cases':
        filtered_df = df_cases[(df_cases['State'] == selected_state) &
                               (df_cases['date'] >= start_date) &
                               (df_cases['date'] <= end_date)]
        daily_data = filtered_df.groupby('date')['cases'].sum().reset_index()
        daily_data['daily_cases'] = daily_data['cases'].diff().fillna(0)  # Convert to daily counts
        y_label = 'Number of Cases'
    else:
        filtered_df = df_deaths[(df_deaths['State'] == selected_state) &
                                (df_deaths['date'] >= start_date) &
                                (df_deaths['date'] <= end_date)]
        daily_data = filtered_df.groupby('date')['deaths'].sum().reset_index()
        daily_data['daily_deaths'] = daily_data['deaths'].diff().fillna(0)  # Convert to daily counts
        y_label = 'Number of Deaths'
    
    # Apply 7-day moving average if selected
    if display_option == 'moving_avg':
        daily_data['value'] = daily_data[f'daily_{data_type}'].rolling(window=7).mean()
    else:
        daily_data['value'] = daily_data[f'daily_{data_type}']

    # Generate trend line predictions
    if scale_type == 'linear':
        trend_line = generate_linear_prediction(daily_data['date'], daily_data['value'].fillna(0))
    else:
        trend_line = generate_nonlinear_prediction(daily_data['date'], daily_data['value'].fillna(0))

    # Create the figure
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=daily_data['date'], y=daily_data['value'],
        mode='lines', name=f'Daily {data_type.capitalize() if display_option == "raw" else "7-Day Avg"}'
    ))
    fig.add_trace(go.Scatter(
        x=daily_data['date'], y=trend_line,
        mode='lines', name='Trend Line (Prediction)',
        line=dict(dash='dash')
    ))
    
    # Apply scale
    fig.update_layout(
        yaxis_type=scale_type,
        title=f"COVID-19 {data_type.capitalize()} in {selected_state}",
        xaxis_title="Date",
        yaxis_title=y_label
    )
    
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)