# all the extra code that we don't need in main project but may come in use in future

#this code is used to get a txt file from the internet in order to convert it to a csv file
#txt file contains geoids associated with each county
#we need geoids so we can merge it into our df
#we need to do that because to use geopandas, we need some sort of common column to associate the location of a county with the county itself
#"easiest" (the most mentally sane) way is by using a geoid

import requests

url = "https://www2.census.gov/geo/docs/reference/county_adjacency/county_adjacency2023.txt"
response = requests.get(url)
if response.status_code == 200:
    with open("geoid.txt", "wb") as file:
        file.write(response.content)
    geoid = pd.read_csv("geoid.txt", delimiter="\|")
    geoid.to_csv("geoid.csv", index=False)
    print(geoid)
else: #just making sure it actually worked
    print("fail")

#in geoid dataframe, there are a lot of repeats of rows (bc each row is a county that borders the main county)
#removing the repeats ("~" is like "!=") and the extra territories

geoid = geoid[~geoid.duplicated(subset='County GEOID', keep='first')].reset_index()
geoid["state"] = geoid["County Name"].str[-2:]
extraStates = ["AS", "GU", "PR", "MP", "VI"]
geoid = geoid.drop(geoid[geoid["state"].isin(extraStates)].index)
geoid

#nice thing about this geoid dataframe is that it has the same number of rows as df (bc both demo and geoid come from the us census)
#now we need to ensure the order of the counties in both is the same (which it should be bc they come from the same source)

#this is just test code to make sure that 
#select rows where values of 'county_x' in df contain values of 'county name' in geoid
#used to ensure that the counties match up (they do!!)
selected_rows = geoid[geoid['County Name'].apply(lambda x: any(item in x for item in df['COUNTY_x']))]
selected_rows #shows 3144 rows, meaning the counties match

wrong = geoid[~geoid['County Name'].apply(lambda x: any(item in x for item in df['COUNTY_x']))]
wrong #shows 0 rows, meaning that no counties mismatch

#GEOID in the shapefile is in the datatype String but is int in geoid, so we are just converting it to string
geoid["GEOID"] = geoid["County GEOID"].astype(str)

#in shapefile, it is 5 digits, so if the geoid is "1002", the format of the data value is "01002"
#this ensures all the values in our geoid df are formatted to have 0 in front if it has 4 digits to make it have 5 digits
def add_zero(value):
    if len(value) == 4:
        return '0' + value
    else:
        return value
geoid['GEOID'] = geoid['GEOID'].apply(add_zero)

#only want GEOID column
geoidFinal = geoid[["GEOID"]]
geoidFinal

#now we merge geoid with df to create a df_with_geoid that has all the necessary data together
df_with_geoid = pd.merge(df, geoidFinal, left_index=True, right_index=True)
df_with_geoid

#using geopandas, we create an empty geospatial viz
gdf = gpd.read_file("cb_2018_us_county_500k.shp")

#merging our df_with_geoid and the empty geospatial viz
merged = gdf.merge(df_with_geoid, on='GEOID', how='left')

#filtering out extra
territory_statefps = ["72", "60", "69", "78", "66"]
merged_gdf = merged[~merged['STATEFP'].isin(territory_statefps)]

col_name = "RISK_SCORE"

#plotting specifically data in a column (i chose risk_score just to test)
us = merged_gdf.plot(column=col_name, cmap='OrRd', legend=True, figsize=(12, 8))
title = col_name + ' by County'
plt.title(title)

#set the extent to focus on the US
#main part of US is [-130,-65]x[24,50]
us.set_xlim([-180, -65])  
us.set_ylim([17, 75])    

#uncomment to see
plt.show()

#how do we know we're looking at correct code!!???
#when you go to the nri national map (https://hazards.fema.gov/nri/map) it shows the same map, just colors are diff :D


#manually set the center and zoom level to cover the entire United States
center = [45, -135]
zoom_level = 3

#create a Folium map centered around the entire United States
interactiveMap = folium.Map(location=center, zoom_start=zoom_level)

#add a choropleth layer for risk
folium.Choropleth(
    geo_data=merged_gdf,
    data=merged_gdf,
    columns=["NAME", 'RISK_SCORE'],
    key_on='feature.properties.NAME',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    line_weight=1,
    legend_name='Risk Level',
    nan_fill_color='gray',  # gray places are NaN values
    nan_fill_opacity=0.4
).add_to(interactiveMap)

#add GeoJson layer with hover information
folium.GeoJson(
    merged_gdf,
    style_function=lambda feature: {
        'fillColor': 'transparent',  #set fill color to transparent
        'color': 'transparent',  #set border color to transparent
        'weight': 0,  #set border weight to 0
        'dashArray': '5, 5',
        'fillOpacity': 0.7 
    },
    highlight_function=lambda x: {'fillColor': 'lightblue', 'color': 'lightblue'},
    tooltip=folium.features.GeoJsonTooltip( #example fields, will change this later
        fields=["GEOID", 'NAME', 'STATE_x', 'RISK_SCORE', 'PERCENT_WHITE', 'PERCENT_BLACK', 'PERCENT_ASIAN'],
        aliases=["GeoID", 'County', 'State', 'Risk Score', 'Percent White', 'Percent Black', 'Percent Asian'],
        localize=True
    )
).add_to(interactiveMap)

#display the map (it won't let me upload file without commenting this out)
#interactiveMap


#check for NaN values in the 'RISK_SCORE' column
missing_data = merged_gdf[merged_gdf['RISK_SCORE'].isna()]
value_counts = missing_data['STATEFP'].value_counts()
print(value_counts)

#ok so these are the nan values
#02 = alaska
#09 = connecticut (fuck connecticut)


#social vulnerability index??
#it relates to race https://www.atsdr.cdc.gov/placeandhealth/svi/index.html
svi = pd.read_csv("SVI_2020_US_county.csv")
svi
#if we even have time we could try and look at this as well