In [14]:
import geopandas as gpd
import pandas as pd
from rasterstats import zonal_stats

In [15]:
# Specify the years of interest
years = [list(range(1997, 2022))

# Create an empty DataFrame to store the merged data
merged_data = pd.DataFrame()

In [17]:
for year in years:
    raster_path = f'/Users/andrewzimmer/Documents/Montana State - Postdoc/Research/Zimmer - Urban Demography : AQ : Heat/data/aq/pm2.5/geotiff/Annual/V5GL03.HybridPM25.Global.{year}01-{year}12.tif'
    shapefile_path = '/Users/andrewzimmer/Documents/Montana State - Postdoc/Research/Zimmer - Urban Demography : AQ : Heat/data/UCDB/ghs_ucdb.shp'
    shp = gpd.read_file(shapefile_path)
    stats = zonal_stats(shp, raster_path, stats="mean")
    stats_zonal = pd.DataFrame(stats)
    stats_zonal['fid'] = shp['fid']
    stats_zonal['Name'] = shp['UC_NM_MN']
    stats_zonal.rename({'mean': f"mean_{year}"}, axis=1, inplace=True)
    print(f"Completed processing data for year {year}")

    # Merge the data on 'fid' and 'Name' fields
    if merged_data.empty:
        merged_data = stats_zonal
    else:
        merged_data = pd.merge(merged_data, stats_zonal, on=['fid', 'Name'], how='outer')

# Display the merged data
merged_data

Completed processing data for year 1998
Completed processing data for year 1999
Completed processing data for year 2000
Completed processing data for year 2001
Completed processing data for year 2002
Completed processing data for year 2003
Completed processing data for year 2004
Completed processing data for year 2005
Completed processing data for year 2006
Completed processing data for year 2007
Completed processing data for year 2008
Completed processing data for year 2009
Completed processing data for year 2010
Completed processing data for year 2011
Completed processing data for year 2012
Completed processing data for year 2013
Completed processing data for year 2014
Completed processing data for year 2015
Completed processing data for year 2016
Completed processing data for year 2017
Completed processing data for year 2018
Completed processing data for year 2019
Completed processing data for year 2020
Completed processing data for year 2021


Unnamed: 0,mean_1998,fid,Name,mean_1999,mean_2000,mean_2001,mean_2002,mean_2003,mean_2004,mean_2005,...,mean_2012,mean_2013,mean_2014,mean_2015,mean_2016,mean_2017,mean_2018,mean_2019,mean_2020,mean_2021
0,3.922700,1.0,Honolulu,4.116564,4.140491,4.214724,3.987730,4.231902,3.813497,4.039264,...,5.424540,4.603681,4.251534,4.539877,3.601227,3.782209,3.417791,3.187730,3.425154,3.190797
1,4.200000,2.0,Papeete,4.209375,4.746875,5.065625,4.884375,5.365625,4.840625,5.500000,...,4.431250,4.609375,5.150000,5.256250,4.462500,4.362500,4.759375,4.843750,4.962500,4.718750
2,6.960784,3.0,Santa Maria,8.398040,8.733334,8.911765,9.345098,8.143137,7.843137,8.443137,...,7.254902,8.178431,8.307843,7.668628,6.333333,8.115686,6.890196,5.421569,9.284314,7.511764
3,5.753192,4.0,Monterey,8.293617,8.108511,7.834043,8.597872,6.974468,7.148936,6.734043,...,5.408510,6.651064,5.361702,5.508511,6.040425,5.972341,6.806383,4.589361,11.076597,6.210639
4,8.495161,5.0,Santa Barbara,9.782258,10.374194,10.690323,11.354839,9.572581,9.590322,9.609677,...,7.943549,8.488709,8.064516,8.112903,7.824194,11.835484,8.345162,6.414516,9.601613,7.977420
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13130,5.441096,13131.0,Tauranga,5.260274,5.691781,6.158904,5.983561,6.208219,6.178082,6.280822,...,6.704110,6.921918,6.791781,6.646576,6.631507,6.663014,6.624658,7.530137,6.845206,6.615068
13131,5.200000,13132.0,Buin,5.200000,6.800001,7.100000,7.500000,6.800001,8.633333,8.066667,...,8.033333,10.400000,10.766666,10.333333,8.400000,9.266666,8.600000,9.366667,7.500000,6.766666
13132,5.275000,13133.0,Honiara,4.205000,6.514999,7.290000,8.105000,7.945000,9.520000,8.570000,...,9.180000,11.030000,10.659999,10.495000,9.745000,11.150000,10.220000,9.665000,9.109999,8.400000
13133,5.262500,13134.0,Nouméa,5.287500,6.045834,6.016666,5.620833,5.429167,5.283333,5.312500,...,5.850000,5.900000,5.433333,5.737500,5.612500,5.350000,5.895833,6.766666,5.800000,5.791667


In [19]:
# Melt the data frame
melted_df = pd.melt(merged_data, id_vars=['fid', 'Name'], var_name='year', value_name='PM25')

# Extract year from the 'year' column
melted_df['year'] = melted_df['year'].str.extract('(\d+)')

# Drop the 'name' column
melted_df = melted_df.drop(columns=['Name'])

# Check it looks ok
melted_df

Unnamed: 0,fid,year,PM25
0,1.0,1998,3.922700
1,2.0,1998,4.200000
2,3.0,1998,6.960784
3,4.0,1998,5.753192
4,5.0,1998,8.495161
...,...,...,...
315235,13131.0,2021,6.615068
315236,13132.0,2021,6.766666
315237,13133.0,2021,8.400000
315238,13134.0,2021,5.791667


In [21]:
melted_df.to_csv('/Users/andrewzimmer/Documents/Montana State - Postdoc/Research/Zimmer - Urban Demography : AQ : Heat/data/aq/pm2.5/ucdb-pm25-extracted.csv', index = False)