<a href="https://colab.research.google.com/github/daniellegauthier/Anthropocene-by-US-County/blob/main/anthropocene_map.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [82]:
import pandas as pd
import numpy as np

# Load and process emissions data in chunks
emissions_data = pd.read_csv('meatpacking final.csv')
print(emissions_data.columns)

Index(['county', 'state', 'percentage w internet', 'matched edu',
       'matched population', 'matched temp', 'matched humidity',
       'matched emissions'],
      dtype='object')


In [106]:
import pandas as pd
import numpy as np

# List of indicators for averaging
indicators = ['matched edu', 'matched population', 'matched temp', 'matched humidity', 'matched emissions']

# Load and process emissions data in chunks
emissions_data = pd.read_csv('meatpacking final.csv', chunksize=10000)

# Get unique state abbreviations
emissions_data_states = set()
for chunk in emissions_data:
    emissions_data_states.update(chunk['state'].unique())

# Load and clean up fips data
fips_data = pd.read_csv('https://raw.githubusercontent.com/kjhealy/fips-codes/master/county_fips_master.csv', encoding='latin-1')
fips_data = fips_data[['fips', 'county', 'state_abbr', 'state_name']].dropna().reset_index(drop=True)
fips_data['county'] = fips_data['county'].astype(str).str.strip()

# Load state fips data
state_fips_data = pd.read_csv('https://raw.githubusercontent.com/kjhealy/fips-codes/master/state_fips_master.csv', encoding='latin-1')
state_fips_data = state_fips_data[['state_abbr', 'region', 'fips']].dropna().reset_index(drop=True)
state_fips_data = state_fips_data.rename(columns={'state_abbr': 'state_abbr_fips', 'fips': 'state_fips'})

# Merge county and state fips data
fips_data = fips_data.merge(state_fips_data, how="inner", left_on="state_abbr", right_on="state_abbr_fips")

print("Columns in fips_data after merge:")
print(fips_data.columns)

def safe_split(value):
    if isinstance(value, str):
        return value.split(',')[0]
    return value

def process_state_chunk(state_name, emissions_data, fips_data):
    merged_data = pd.DataFrame()

    # Process each chunk and merge with fips_data for the specified state
    for chunk in emissions_data:
        # Clean and prepare the emissions data chunk
        chunk = chunk[chunk['state'].str.strip() == state_name]
        chunk = chunk.rename(columns=str.lower)
        chunk['county_name'] = chunk['county'].str.strip().str.split(',').str[0].str.strip()

        # Filter fips_data for the specified state
        state_fips_data = fips_data[fips_data['state_name'] == state_name]

        print(f"Number of counties in emissions data for {state_name}: {len(chunk['county_name'].unique())}")
        print(f"Number of counties in FIPS data for {state_name}: {len(state_fips_data['county'])}")

        print("Sample counties from emissions data:")
        print(chunk['county_name'].head())
        print("Sample counties from FIPS data:")
        print(state_fips_data['county'].head())

        # Merge cleaned emissions data chunk with fips_data
        merged_chunk = chunk.merge(
            state_fips_data[["fips", "county", "state_abbr", "state_name"]],
            how="left",
            left_on="county_name",
            right_on="county",
            suffixes=('', '_fips')
        )

        print(f"Number of rows after merge: {len(merged_chunk)}")

        # Calculate average indicators for each county
        for indicator in indicators:
            merged_chunk[indicator] = merged_chunk[indicator].apply(safe_split)
            merged_chunk[indicator] = pd.to_numeric(merged_chunk[indicator], errors='coerce')
            merged_chunk[f'avg_{indicator}'] = merged_chunk[indicator].mean()

        merged_data = pd.concat([merged_data, merged_chunk], axis=0, ignore_index=True)

    return merged_data

# Prompt the user for the state they want to view
state_name = input("Enter the full state name you want to view (e.g., New York, California): ")

# Reset the emissions_data iterator
emissions_data = pd.read_csv('meatpacking final.csv', chunksize=10000)

# Process the user-specified state
state_data = process_state_chunk(state_name, emissions_data, fips_data)

# Sort the data by county
try:
    state_data = state_data.sort_values(by='county_name')
except KeyError as e:
    print(f"Error sorting data: {e}")
    print("Available columns for sorting:")
    print(state_data.columns)
    # Fallback sorting if 'county_name' is not available
    state_data = state_data.sort_values(by=state_data.columns[0])

# Display summary of the processed data
print("\nProcessed Data Summary:")
print(state_data.head())  # Display first few rows
print(f"\nTotal rows: {len(state_data)}")
print("\nColumn names:")
print(state_data.columns)

# Function to display county data
def display_county_data(county_data):
    print("\nCounty Data:")
    print(county_data)
    print("\nAverage values for indicators:")
    for indicator in indicators:
        avg_col = f'avg_{indicator}'
        if avg_col in county_data.columns:
            print(f"{indicator}: {county_data[avg_col].iloc[0]:.2f}")

# Prompt user to select a county
while True:
    print("\nAvailable counties:")
    print(state_data['county_name'].unique())

    county_name = input("\nEnter the name of the county you want to view (or 'quit' to exit): ")

    if county_name.lower() == 'quit':
        break

    county_data = state_data[state_data['county_name'] == county_name]

    if len(county_data) == 0:
        print(f"No data found for {county_name}. Please try again.")
    else:
        display_county_data(county_data)

    continue_prompt = input("\nWould you like to view another county? (yes/no): ")
    if continue_prompt.lower() != 'yes':
        break

print("Thank you for using the Emissions Data Processing Script!")

# Optionally, save the data to a CSV file
# state_data.to_csv(f"{state_name.replace(' ', '_').lower()}_emissions_data.csv", index=False)

Columns in fips_data after merge:
Index(['fips', 'county', 'state_abbr', 'state_name', 'state_abbr_fips',
       'region', 'state_fips'],
      dtype='object')
Enter the full state name you want to view (e.g., New York, California): New York
Number of counties in emissions data for New York: 62
Number of counties in FIPS data for New York: 62
Sample counties from emissions data:
1829         Albany County
1830       Allegany County
1831          Bronx County
1832         Broome County
1833    Cattaraugus County
Name: county_name, dtype: object
Sample counties from FIPS data:
1827    1.0
1828    3.0
1829    5.0
1830    7.0
1831    9.0
Name: county, dtype: object
Number of rows after merge: 62

Processed Data Summary:
                         county     state  percentage w internet  matched edu  \
0       Albany County, New York  New York                   80.5         29.2   
1     Allegany County, New York  New York                   72.6         14.8   
2        Bronx County, New York

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests
import io

def load_geojson_data(url):
    response = requests.get(url)
    return gpd.read_file(io.StringIO(response.text))

url = "https://raw.githubusercontent.com/CodeForCary/CountyDataUSA5m/refs/heads/master/cb_2017_us_county_5m.json"
counties = load_geojson_data(url)

# Load your emissions data (replace this with your actual data loading code)
emissions_data = pd.read_csv('meatpacking final.csv')
print(emissions_data[['matched edu']].head())
print(emissions_data['matched edu'].dtype)
print(merged_data[['county_name', 'matched edu']].head())
print(emissions_data.shape)
print(emissions_data.columns)
print(emissions_data.head())

def create_state_choropleth(state_name, data_column):
    state_counties = counties[counties['STATEFP'] == state_fips[state_name]]

   # For the state_counties DataFrame
    state_counties['county_name'] = state_counties['NAME'] + ' County, ' + state_name

    # For the emissions_data DataFrame
    emissions_data['county_name'] = emissions_data['county']

    # Then perform the merge
    merged_data = state_counties.merge(emissions_data[emissions_data['state'] == state_name],
                                   on='county_name', how='left')

    print(f"GeoJSON counties: {state_counties['county_name'].tolist()}")
    print(f"Emissions data counties: {emissions_data[emissions_data['state'] == state_name]['county_name'].tolist()}")

    merged_data = state_counties.merge(
    emissions_data[emissions_data['state'] == state_name],
    left_on=['STATEFP', 'county_name'],
    right_on=['state', 'county_name'],
    how='left'
)

    print(f"Number of counties in GeoJSON: {len(state_counties)}")
    print(f"Number of counties after merge: {len(merged_data)}")
    print(f"Number of non-null values in {data_column}: {merged_data[data_column].notnull().sum()}")

    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    merged_data.plot(column=data_column, ax=ax, legend=True,
                     legend_kwds={'label': data_column, 'orientation': 'horizontal'},
                     missing_kwds={'color': 'lightgrey'})

    ax.set_title(f'{state_name} - {data_column} by County')
    ax.axis('off')
    plt.tight_layout()
    plt.show()

# Dictionary of state names to FIPS codes
state_fips = {
    'Alabama': '01', 'Alaska': '02', 'Arizona': '04', 'Arkansas': '05', 'California': '06',
    'Colorado': '08', 'Connecticut': '09', 'Delaware': '10', 'Florida': '12', 'Georgia': '13',
    'Hawaii': '15', 'Idaho': '16', 'Illinois': '17', 'Indiana': '18', 'Iowa': '19',
    'Kansas': '20', 'Kentucky': '21', 'Louisiana': '22', 'Maine': '23', 'Maryland': '24',
    'Massachusetts': '25', 'Michigan': '26', 'Minnesota': '27', 'Mississippi': '28', 'Missouri': '29',
    'Montana': '30', 'Nebraska': '31', 'Nevada': '32', 'New Hampshire': '33', 'New Jersey': '34',
    'New Mexico': '35', 'New York': '36', 'North Carolina': '37', 'North Dakota': '38', 'Ohio': '39',
    'Oklahoma': '40', 'Oregon': '41', 'Pennsylvania': '42', 'Rhode Island': '44', 'South Carolina': '45',
    'South Dakota': '46', 'Tennessee': '47', 'Texas': '48', 'Utah': '49', 'Vermont': '50',
    'Virginia': '51', 'Washington': '53', 'West Virginia': '54', 'Wisconsin': '55', 'Wyoming': '56'
}

while True:
    print("Available states:")
    print(", ".join(state_fips.keys()))
    state_name = input("Enter the state name (or 'quit' to exit): ")

    if state_name.lower() == 'quit':
        break

    if state_name not in state_fips:
        print("Invalid state name. Please try again.")
        continue

    print("\nAvailable data columns:")
    print(", ".join(emissions_data.columns))
    data_column = input("Enter the data column to visualize: ")

    if data_column not in emissions_data.columns:
        print("Invalid data column. Please try again.")
        continue

    # Create and display the choropleth map
    create_state_choropleth(state_name, data_column)

    another = input("Would you like to create another map? (yes/no): ")
    if another.lower() != 'yes':
        break

print("Thank you for using the State County Choropleth Map Generator!")

   matched edu
0         18.2
1         21.4
2          8.2
3          8.1
4          8.9
float64
Empty DataFrame
Columns: [county_name, matched edu]
Index: []
(3143, 8)
Index(['county', 'state', 'percentage w internet', 'matched edu',
       'matched population', 'matched temp', 'matched humidity',
       'matched emissions'],
      dtype='object')
                    county    state  percentage w internet  matched edu  \
0  Autauga County, Alabama  Alabama                   74.1         18.2   
1  Baldwin County, Alabama  Alabama                   77.7         21.4   
2  Barbour County, Alabama  Alabama                   52.5          8.2   
3     Bibb County, Alabama  Alabama                   56.2          8.1   
4   Blount County, Alabama  Alabama                   66.9          8.9   

   matched population  matched temp  matched humidity  matched emissions  
0               58805          55.5              68.0                 79  
1              231767          60.5            