In [17]:
!pip install torch



In [195]:
!pip install plotly



In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import calendar
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler 
import torch
import torch.nn as nn
import torch.optim as optim
import plotly.graph_objs as go
import plotly.offline as pyo

In [3]:
# load read data
df_hepatitis = pd.read_csv('hepatitis.csv')
df_measles = pd.read_csv('measles.csv')
df_mumps = pd.read_csv('mumps.csv')
df_pertussis = pd.read_csv('pertussis.csv')
df_rubella = pd.read_csv('rubella.csv')
df_smallpox = pd.read_csv('smallpox.csv')
df_hepatitis

Unnamed: 0,week,state,state_name,disease,cases,incidence_per_capita
0,196601,AL,ALABAMA,HEPATITIS A,5,0.14
1,196601,AR,ARKANSAS,HEPATITIS A,11,0.58
2,196601,AZ,ARIZONA,HEPATITIS A,6,0.37
3,196601,CA,CALIFORNIA,HEPATITIS A,89,0.47
4,196601,CO,COLORADO,HEPATITIS A,1,0.05
...,...,...,...,...,...,...
90834,201152,VT,VERMONT,HEPATITIS A,0,0.00
90835,201152,WA,WASHINGTON,HEPATITIS A,0,0.00
90836,201152,WI,WISCONSIN,HEPATITIS A,0,0.00
90837,201152,WV,WEST VIRGINIA,HEPATITIS A,0,0.00


In [7]:
# Taking care of data discrepancies 
dfs = [df_hepatitis, df_measles, df_mumps, df_pertussis, df_rubella, df_smallpox]

for i, df in enumerate(dfs):
    max_cases = df['cases'].max()  
    dfs[i] = df[df['cases'] != max_cases]  

class DiseasePredictor(nn.Module):
    def __init__(self, input_dim):
        super(DiseasePredictor, self).__init__()
        self.fc1 = nn.Linear(in_features=input_dim, out_features=64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Preprocessing function to normalize the features
def preprocess_data(df):
    scaler = MinMaxScaler()
    df[['week', 'incidence_per_capita', 'cases']] = scaler.fit_transform(df[['week', 'incidence_per_capita', 'cases']])
    return df

# Loop through each DataFrame
disease_dfs = {
    'Hepatitis': df_hepatitis,
    'Measles': df_measles,
    'Mumps': df_mumps,
    'Pertussis': df_pertussis,
    'Rubella': df_rubella,
    'Smallpox': df_smallpox
}

# Fixed the loop to correctly process each disease
for disease, df in disease_dfs.items():
    print(f"\nProcessing {disease}")
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Define features and target
    X = df[['week', 'incidence_per_capita', 'cases']]  # Features
    y = df['cases']  # Target
    
    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Convert to tensors
    X_train_tensor, y_train_tensor = torch.tensor(X_train.values, dtype=torch.float32), torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    X_val_tensor, y_val_tensor = torch.tensor(X_val.values, dtype=torch.float32), torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)
    
    # Initialize model
    model = DiseasePredictor(input_dim=X.shape[1])  # Input dimension is the number of features
    
    # Initialize optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    
    # Training loop
    epochs = 5
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        predictions = model(X_val_tensor)
        val_loss = criterion(predictions, y_val_tensor)
        print(f"{disease} Validation Loss: {val_loss.item()}")

    # Optionally, save each model with a disease-specific name
    torch.save(model.state_dict(), f'{disease.lower()}_model.pth')


Processing Hepatitis
Hepatitis Validation Loss: 0.0014427057467401028

Processing Measles
Measles Validation Loss: 0.007413318380713463

Processing Mumps
Mumps Validation Loss: 0.0013123893877491355

Processing Pertussis
Pertussis Validation Loss: 0.0022524010855704546

Processing Rubella
Rubella Validation Loss: 0.00234160921536386

Processing Smallpox
Smallpox Validation Loss: 0.0024160693865269423


In [11]:
disease_models = {}
for disease in disease_dfs.keys():
    model = DiseasePredictor(input_dim=3)  # Change input_dim to 3 since there are now 3 features
    model.load_state_dict(torch.load(f'{disease.lower()}_model.pth'))
    model.eval()
    disease_models[disease] = model

# Create tables for each disease
predicted_tables = {}
for disease, model in disease_models.items():
    # Create a DataFrame to store predicted cases
    predicted_df = pd.DataFrame(columns=['state', 'predicted_cases'])
    states = disease_dfs[disease]['state'].unique()  # Get unique states
    for state in states:
        # Prepare input tensor for prediction
        
        state_df = disease_dfs[disease][disease_dfs[disease]['state'] == state] 
        
        if disease == 'Hepatitis':
            state_df *= 100
        if disease == 'Measles':
            state_df *= 100
        if disease == 'Mumps':
            state_df *= 10
        if disease == 'Pertussis':
            state_df *= 100
        features_tensor = torch.tensor(state_df[['week', 'incidence_per_capita', 'cases']].values, dtype=torch.float32)
        
        # Make predictions
        with torch.no_grad():
            predictions = model(features_tensor).numpy()
            
        predictions = np.abs(predictions)
        
        # Append predicted cases to DataFrame
        predicted_df = predicted_df.append({'state': state, 'predicted_cases': predictions.mean()}, ignore_index=True)
    
    # Sort DataFrame by state for better readability
    predicted_df.sort_values(by='state', inplace=True)
    
    # Store the predicted table for the disease
    predicted_tables[disease] = predicted_df

# Display tables
for disease, table in predicted_tables.items():
    print(f"\n{disease} Predicted Cases Table:")
    print(table)


Hepatitis Predicted Cases Table:
   state  predicted_cases
44    AK         2.490966
0     AL         2.188398
1     AR         2.115860
2     AZ         2.250156
3     CA         2.277862
4     CO         2.253590
5     CT         2.204620
50    DC         2.314240
6     DE         2.305641
7     FL         2.222294
8     GA         2.215647
9     HI         2.272940
10    IA         2.228452
11    ID         2.239572
12    IL         2.167159
13    IN         2.176630
14    KS         2.184398
15    KY         2.219060
16    LA         2.160326
17    MA         2.123902
18    MD         2.211813
19    ME         2.312652
20    MI         2.211100
21    MN         2.145072
22    MO         2.206083
23    MS         2.113119
24    MT         2.226837
25    NC         2.186607
26    ND         1.993811
46    NE         2.406503
27    NH         2.277140
28    NJ         2.149621
29    NM         2.220386
30    NV         2.511564
45    NY         2.202471
31    OH         2.212927
47  

In [9]:
def create_heatmap(predicted_table, disease_name):
    # Create the heatmap
    fig = go.Figure(data=go.Choropleth(
        locations=predicted_table['state'],  # Spatial coordinates
        z=predicted_table['predicted_cases'].astype(float),  # Data to be color-coded
        locationmode='USA-states',  # set of locations match entries in `locations`
        colorscale='Reds',
        colorbar_title="Predicted Cases",
    ))

    fig.update_layout(
        title_text=f'Predicted {disease_name} Cases by State Cases Per 100,000 Population',
        geo_scope='usa',  # limit map scope to USA
    )

    # Save the plot as an HTML file
    filename = f'heatmap_{disease_name.lower()}.html'
    pyo.plot(fig, filename=filename)

# Generate and save a heatmap for each disease
for disease_name, predicted_table in predicted_tables.items():
    create_heatmap(predicted_table, disease_name)

In [30]:
model_files = ['hepatitis_model.pth', 'measles_model.pth', 'mumps_model.pth', 
               'pertussis_model.pth', 'rubella_model.pth', 'smallpox_model.pth']
models = {}

for file_name in model_files:
    model = DiseasePredictor(input_dim=3)
    model.load_state_dict(torch.load(file_name))
    model.eval()
    models[file_name] = model