In [None]:
import geopandas as gpd
import pandas as pd


In [None]:
shapefile_path = "UScounties.shp"
# gdf = gpd.read_file(shapefile_path)
# print(gdf.head())
# gdf.plot()

In [None]:
# pd.read_csv("covidData2021-2022.csv", dtype={'date': pd.StringDtype(), 'county': pd.StringDtype(), 'state': pd.StringDtype(), 'fips': pd.StringDtype(), 'cases': pd.Int64Dtype(), 'deaths': pd.Int64Dtype()},keep_default_na=False,na_values=pd.NA)
covid_data=pd.read_csv("covidData2021-2022.csv")


covid_data["date"] = pd.to_datetime(covid_data["date"])
print(covid_data.head(-20))
print(covid_data.info())

In [None]:
import matplotlib.pyplot as plt

# Assuming the data is already loaded and named as `df`
covid_data['daily_new_cases'] = covid_data.groupby(['county', 'state'])['cases'].diff().fillna(covid_data['cases'])
covid_data['daily_new_cases'] = covid_data['daily_new_cases'].apply(lambda x: x if x >= 0 else 0)

# Summing up daily new cases across all counties and states for each day
daily_new_cases = covid_data.groupby('date')['daily_new_cases'].sum().reset_index()

# Plotting the daily new COVID-19 cases
plt.figure(figsize=(12, 6))
plt.plot(daily_new_cases['date'], daily_new_cases['daily_new_cases'], label='Daily New Cases', color='red')
plt.title('Daily New COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Daily New Cases')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import numpy as np

# Convert 'date' to datetime object
covid_data['date'] = pd.to_datetime(covid_data['date'])

# Find the end of week date (Friday) for each entry
covid_data['end_of_week'] = covid_data['date'] + pd.to_timedelta(
    (4 - covid_data['date'].dt.weekday) % 7, unit='d')

# Now group by 'end_of_week' and 'fips' and sum the 'daily_new_cases'
weekly_cases = covid_data.groupby(['fips','state', 'end_of_week'])['daily_new_cases'].sum().reset_index()

# This DataFrame 'weekly_cases' now has weekly summed cases for each FIPS code.
weekly_cases.head(20)

In [None]:
# Assuming 'weekly_cases' DataFrame has 'fips' and 'end_of_week' columns from earlier steps

# Create a sorted list of unique FIPS codes
unique_fips = weekly_cases['fips'].unique()

# Create a sorted list of unique weeks (end_of_week dates)
unique_weeks = weekly_cases['end_of_week'].unique()


# Generate all combinations of FIPS codes and weeks
import itertools

fips_week_combinations = list(itertools.product(unique_fips, unique_weeks))

# Convert to DataFrame
fips_weeks = pd.DataFrame(fips_week_combinations, columns=['FIPS', 'Week'])
fips_weeks = fips_weeks.merge(covid_data[["state", "fips"]], left_on="FIPS", right_on="fips")
covid_data= None
fips_week_combinations= None


In [None]:
# Merge the `weekly_cases` data with `fips_weeks` to associate the number of new cases with each FIPS code and week.
complete_data = pd.merge(fips_weeks, weekly_cases, how='left', left_on=['FIPS', 'Week'], right_on=['fips', 'end_of_week'])

# Fill any NaN values that result from weeks where a FIPS code had no reported cases with zeros.
complete_data['daily_new_cases'].fillna(0, inplace=True)

# You now have a complete dataset to use as input for your Bayesian network.
# Each row represents a node, with columns for the FIPS code, the week, and the number of new cases.

complete_data.head(-20)

In [None]:
import geopandas as gpd

# Assuming this path is correct and points to your shapefile.
shapefile_path = 'UScounties.shp'
counties = gpd.read_file(shapefile_path)
counties["FIPS"] = counties["FIPS"].astype(float)
counties.info()
counties.head(-20)
weekly_cases= None


In [None]:
def get_neighbors(fips_code):
    # Ensure the FIPS code is a string if the DataFrame expects it as such
    fips_float = float(fips_code)
    
    # Select the county based on FIPS code
    county = counties[counties['FIPS'] == fips_float]

    # If the county doesn't exist in the DataFrame, return an empty list
    if county.empty:
        
        return []
    
    # Use spatial joins to find neighbors
    neighbors = gpd.sjoin(counties, county, predicate='touches', how='inner')
    # Get the FIPS codes of the neighbors. Depending on your GeoDataFrame, this could be 'FIPS_left' or just 'FIPS'
    neighbor_fips = neighbors['FIPS_left'].tolist()

    # Return a list of neighbors' FIPS codes, excluding the original county's FIPS code
    return [f for f in neighbor_fips if f != fips_float]


In [None]:
get_neighbors(27077.0)

In [None]:
from pgmpy.models import BayesianNetwork

# Initialize an empty Bayesian Network
bayesian_network = BayesianNetwork()

# Add nodes. Assuming 'complete_data' is already loaded with the necessary columns.
nodes = [(row['FIPS'], row['Week']) for index, row in complete_data.iterrows()]
bayesian_network.add_nodes_from(nodes)


In [None]:
# Get a sorted list of unique weeks
complete_data[complete_data["state"] == "Illinois"]
unique_weeks = sorted(complete_data['Week'].unique())

for i in range(len(unique_weeks) - 1):
    week = unique_weeks[i]
    next_week = unique_weeks[i + 1]
    current_week_fips = complete_data[complete_data['Week'] == week]['FIPS'].unique()
    next_week_fips = complete_data[complete_data['Week'] == next_week]['FIPS'].unique()

    for fips in current_week_fips:
        neighbors = get_neighbors(fips)
        for neighbor in neighbors:
            # Check if each neighbor also has a record in the next week
            if neighbor in next_week_fips:
                # Append the tuple (current_week_fips, next_week_fips) to the potential edges
                potential_edges.append(((fips, week), (neighbor, next_week)))


In [None]:
print((potential_edges))

In [None]:
from pgmpy.estimators import HillClimbSearch, BicScore

# Define the scoring function using BIC
bic = BicScore(complete_data)

# Initialize the Hill Climb Search without specifying the scoring method here
hcs = HillClimbSearch(complete_data)

# Run the Hill Climb Search using the potential edges as a whitelist and set the scoring method here
best_model = hcs.estimate(white_list=potential_edges, scoring_method=bic)

# Update the Bayesian Network with the edges from the best model found
bayesian_network = BayesianNetwork(best_model.edges())
