## Download PM2.5 Hourly Data for Weather Stations

In [4]:
import requests
import os
import time

In [None]:
# programatically download both federally-regulated and non-federally-regulated PM2.5 information
# https://aqs.epa.gov/aqsweb/airdata/download_files.html#Raw
years = list(range(2010,2024+1))
if not os.path.exists("PM2.5"):
    os.mkdir("PM2.5")
for year in years:
    print(year)
    if not os.path.exists(f"PM2.5/hourly_88101_{year}.zip"):
        url = f"https://aqs.epa.gov/aqsweb/airdata/hourly_88101_{year}.zip"
        response = requests.get(url, stream=True)
        with open(f"PM2.5/hourly_88101_{year}.zip", mode="wb") as file:
            for chunk in response.iter_content(chunk_size=10 * 1024):
                file.write(chunk)

    if not os.path.exists(f"PM2.5/hourly_88502_{year}.zip"):
        url = f"https://aqs.epa.gov/aqsweb/airdata/hourly_88502_{year}.zip"
        response = requests.get(url, stream=True)
        with open(f"PM2.5/hourly_88502_{year}.zip", mode="wb") as file:
            for chunk in response.iter_content(chunk_size=10 * 1024):
                file.write(chunk)
        time.sleep(2)
    

In [None]:
import pandas as pd
PM2010 = pd.read_csv('PM2.5/hourly_88101_2010.zip')
PM2010

In [None]:
PM2010_88502 = pd.read_csv('PM2.5/hourly_88502_2010.zip')
PM2010_88502

## Other Weather Station Data (meteostat)

In [None]:
# download bulk meteostat data
if not os.path.exists("meteostat/bulk_data_full.json.gz"):
    url = "https://bulk.meteostat.net/v2/stations/full.json.gz"
    response = requests.get(url, stream=True)
    if not os.path.exists("meteostat"):
        os.mkdir("meteostat")
        with open("meteostat/bulk_data_full.json.gz", mode="wb") as file:
            for chunk in response.iter_content(chunk_size=10 * 1024):
                file.write(chunk)


In [None]:
import json
import gzip
# with open('meteostat/bulk_data_full.json.gz', 'r') as file:
#     full = json.load(file)

with gzip.open('meteostat/bulk_data_full.json.gz', 'r') as fin:
    full = json.loads(fin.read().decode('utf-8'))
usStations = [x for x in full if x["country"] == "US"]
usStations[0:3]

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# radius for matching
radius = 100

# Convert DataFrames to GeoDataFrames
def to_gdf(df, lat_col='latitude', lon_col='longitude'):
    geometry = [Point(xy) for xy in zip(df[lon_col], df[lat_col])]
    return gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

# add geometry column for long/lat comparisons
PM2010geo = to_gdf(PM2010, 'Latitude', 'Longitude')
PM2010_88502geo = to_gdf(PM2010_88502, 'Latitude', 'Longitude')

In [None]:
# convert json to dataframe
usStationsdf = pd.DataFrame(usStations)
usStationsdf

In [None]:
# extract lat long from table
usStationsdf['latitude'] = [x['latitude'] for x in usStationsdf['location']]
usStationsdf['longitude'] = [x['longitude'] for x in usStationsdf['location']]

In [None]:
display(usStationsdf[['latitude', 'longitude']])
display(PM2010geo.drop_duplicates(('Latitude', 'Longitude'))[['Latitude', 'Longitude']])

In [None]:
# usStationsdf['latitude'] = round(usStationsdf['latitude'], 2)
# usStationsdf['longitude'] = round(usStationsdf['longitude'], 2)

# PM2010geo['Latitude'] = round(PM2010geo['Latitude'], 2)
# PM2010geo['Longitude'] = round(PM2010geo['Longitude'], 2)

# display(usStationsdf[['latitude', 'longitude']])
# display(PM2010geo.drop_duplicates(('Latitude', 'Longitude'))[['Latitude', 'Longitude']])

In [None]:
# usStationsdf.merge(PM2010geo.drop_duplicates(('Latitude', 'Longitude')), left_on=('latitude', 'longitude'), right_on=('Latitude', 'Longitude'))

In [None]:
# add geometry column for long/lat comparisons
usStationsgeo = to_gdf(usStationsdf)

In [None]:
# project to mercator for meter comparisons
PM2010geo = PM2010geo.to_crs(epsg=3857)
PM2010_88502geo = PM2010_88502geo.to_crs(epsg=3857)
usStationsgeo = usStationsgeo.to_crs(epsg=3857)

In [None]:
display(PM2010geo.shape)
display(PM2010_88502geo.shape)
display(usStationsgeo.shape)

In [None]:
display(PM2010geo['geometry'])
display(PM2010_88502geo['geometry'])
display(usStationsgeo['geometry'])

In [None]:
m = PM2010geo.drop_duplicates(('Latitude', 'Longitude')).explore()
m = PM2010_88502geo.drop_duplicates(('Latitude', 'Longitude')).explore(m = m, color='green')
usStationsgeo.explore(m=m, color='red')

# Same, but 2024

In [5]:
import pandas as pd
PM2024 = pd.read_csv('PM2.5/hourly_88101_2024.zip')
PM2024

  PM2024 = pd.read_csv('PM2.5/hourly_88101_2024.zip')


Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Date Local,...,Units of Measure,MDL,Uncertainty,Qualifier,Method Type,Method Code,Method Name,State Name,County Name,Date of Last Change
0,1,3,10,88101,3,30.497478,-87.880258,NAD83,PM2.5 - Local Conditions,2024-01-02,...,Micrograms/cubic meter (LC),5.0,,,FEM,209,Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM...,Alabama,Baldwin,2024-07-19
1,1,3,10,88101,3,30.497478,-87.880258,NAD83,PM2.5 - Local Conditions,2024-01-02,...,Micrograms/cubic meter (LC),5.0,,,FEM,209,Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM...,Alabama,Baldwin,2024-07-19
2,1,3,10,88101,3,30.497478,-87.880258,NAD83,PM2.5 - Local Conditions,2024-01-02,...,Micrograms/cubic meter (LC),5.0,,,FEM,209,Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM...,Alabama,Baldwin,2024-07-19
3,1,3,10,88101,3,30.497478,-87.880258,NAD83,PM2.5 - Local Conditions,2024-01-02,...,Micrograms/cubic meter (LC),5.0,,,FEM,209,Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM...,Alabama,Baldwin,2024-07-19
4,1,3,10,88101,3,30.497478,-87.880258,NAD83,PM2.5 - Local Conditions,2024-01-02,...,Micrograms/cubic meter (LC),5.0,,,FEM,209,Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM...,Alabama,Baldwin,2024-07-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4759889,80,26,6,88101,1,31.291293,-110.951513,WGS84,PM2.5 - Local Conditions,2024-09-30,...,Micrograms/cubic meter (LC),0.1,,,FEM,638,Teledyne T640X at 16.67 LPM w/Network Data Ali...,Country Of Mexico,SONORA,2024-10-16
4759890,80,26,6,88101,1,31.291293,-110.951513,WGS84,PM2.5 - Local Conditions,2024-09-30,...,Micrograms/cubic meter (LC),0.1,,,FEM,638,Teledyne T640X at 16.67 LPM w/Network Data Ali...,Country Of Mexico,SONORA,2024-10-16
4759891,80,26,6,88101,1,31.291293,-110.951513,WGS84,PM2.5 - Local Conditions,2024-09-30,...,Micrograms/cubic meter (LC),0.1,,,FEM,638,Teledyne T640X at 16.67 LPM w/Network Data Ali...,Country Of Mexico,SONORA,2024-10-16
4759892,80,26,6,88101,1,31.291293,-110.951513,WGS84,PM2.5 - Local Conditions,2024-09-30,...,Micrograms/cubic meter (LC),0.1,,,FEM,638,Teledyne T640X at 16.67 LPM w/Network Data Ali...,Country Of Mexico,SONORA,2024-10-16


In [6]:
PM2024_88502 = pd.read_csv('PM2.5/hourly_88502_2024.zip')
PM2024_88502

  PM2024_88502 = pd.read_csv('PM2.5/hourly_88502_2024.zip')


Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Date Local,...,Units of Measure,MDL,Uncertainty,Qualifier,Method Type,Method Code,Method Name,State Name,County Name,Date of Last Change
0,2,110,4,88502,3,58.388497,-134.567237,WGS84,Acceptable PM2.5 AQI & Speciation Mass,2024-01-01,...,Micrograms/cubic meter (LC),0.1,,IH,FEM,238,Teledyne T640X at 16.67 LPM - Broadband spectr...,Alaska,Juneau,2024-06-18
1,2,110,4,88502,3,58.388497,-134.567237,WGS84,Acceptable PM2.5 AQI & Speciation Mass,2024-01-01,...,Micrograms/cubic meter (LC),0.1,,IH,FEM,238,Teledyne T640X at 16.67 LPM - Broadband spectr...,Alaska,Juneau,2024-06-18
2,2,110,4,88502,3,58.388497,-134.567237,WGS84,Acceptable PM2.5 AQI & Speciation Mass,2024-01-01,...,Micrograms/cubic meter (LC),0.1,,IH,FEM,238,Teledyne T640X at 16.67 LPM - Broadband spectr...,Alaska,Juneau,2024-06-18
3,2,110,4,88502,3,58.388497,-134.567237,WGS84,Acceptable PM2.5 AQI & Speciation Mass,2024-01-01,...,Micrograms/cubic meter (LC),0.1,,IH,FEM,238,Teledyne T640X at 16.67 LPM - Broadband spectr...,Alaska,Juneau,2024-06-18
4,2,110,4,88502,3,58.388497,-134.567237,WGS84,Acceptable PM2.5 AQI & Speciation Mass,2024-01-01,...,Micrograms/cubic meter (LC),0.1,,IH,FEM,238,Teledyne T640X at 16.67 LPM - Broadband spectr...,Alaska,Juneau,2024-06-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1042870,72,97,7,88502,1,18.216038,-67.144107,NAD83,Acceptable PM2.5 AQI & Speciation Mass,2024-06-30,...,Micrograms/cubic meter (LC),2.0,,V,Non-FRM,181,PM2.5 VSCC - FDMS-Gravimetric,Puerto Rico,Mayagnez,2024-08-19
1042871,72,97,7,88502,1,18.216038,-67.144107,NAD83,Acceptable PM2.5 AQI & Speciation Mass,2024-06-30,...,Micrograms/cubic meter (LC),2.0,,,Non-FRM,181,PM2.5 VSCC - FDMS-Gravimetric,Puerto Rico,Mayagnez,2024-08-19
1042872,72,97,7,88502,1,18.216038,-67.144107,NAD83,Acceptable PM2.5 AQI & Speciation Mass,2024-06-30,...,Micrograms/cubic meter (LC),2.0,,,Non-FRM,181,PM2.5 VSCC - FDMS-Gravimetric,Puerto Rico,Mayagnez,2024-08-19
1042873,72,97,7,88502,1,18.216038,-67.144107,NAD83,Acceptable PM2.5 AQI & Speciation Mass,2024-06-30,...,Micrograms/cubic meter (LC),2.0,,,Non-FRM,181,PM2.5 VSCC - FDMS-Gravimetric,Puerto Rico,Mayagnez,2024-08-19


## Other Weather Station Data (meteostat)

In [7]:
# download bulk meteostat data
if not os.path.exists("meteostat/bulk_data_full.json.gz"):
    url = "https://bulk.meteostat.net/v2/stations/full.json.gz"
    response = requests.get(url, stream=True)
    if not os.path.exists("meteostat"):
        os.mkdir("meteostat")
        with open("meteostat/bulk_data_full.json.gz", mode="wb") as file:
            for chunk in response.iter_content(chunk_size=10 * 1024):
                file.write(chunk)


In [8]:
import json
import gzip
# with open('meteostat/bulk_data_full.json.gz', 'r') as file:
#     full = json.load(file)

with gzip.open('meteostat/bulk_data_full.json.gz', 'r') as fin:
    full = json.loads(fin.read().decode('utf-8'))
usStations = [x for x in full if x["country"] == "US"]
usStations[0:3]

[{'id': '04AEH',
  'name': {'en': 'Norwich'},
  'country': 'US',
  'region': 'NY',
  'identifiers': {'national': None, 'wmo': None, 'icao': 'KOIC'},
  'location': {'latitude': 42.5665, 'longitude': -75.5242, 'elevation': 312},
  'timezone': 'America/New_York',
  'inventory': {'model': {'start': '2022-04-24', 'end': '2025-04-23'},
   'hourly': {'start': '2022-04-23', 'end': '2025-04-14'},
   'daily': {'start': '2022-04-23', 'end': '2022-04-26'},
   'monthly': {'start': None, 'end': None},
   'normals': {'start': None, 'end': None}}},
 {'id': '0MV8M',
  'name': {'en': 'Hill Air Force Base'},
  'country': 'US',
  'region': 'UT',
  'identifiers': {'national': None, 'wmo': None, 'icao': 'KHIF'},
  'location': {'latitude': 41.1111, 'longitude': -111.9623, 'elevation': 1459},
  'timezone': 'America/Denver',
  'inventory': {'model': {'start': '2022-04-24', 'end': '2025-04-23'},
   'hourly': {'start': '2022-04-23', 'end': '2025-04-14'},
   'daily': {'start': '2022-04-23', 'end': '2022-04-25'},


In [9]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# radius for matching
radius = 100

# Convert DataFrames to GeoDataFrames
def to_gdf(df, lat_col='latitude', lon_col='longitude'):
    geometry = [Point(xy) for xy in zip(df[lon_col], df[lat_col])]
    return gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

# add geometry column for long/lat comparisons
PM2024geo = to_gdf(PM2024, 'Latitude', 'Longitude')
PM2024_88502geo = to_gdf(PM2024_88502, 'Latitude', 'Longitude')

In [10]:
# convert json to dataframe
usStationsdf = pd.DataFrame(usStations)
usStationsdf

Unnamed: 0,id,name,country,region,identifiers,location,timezone,inventory
0,04AEH,{'en': 'Norwich'},US,NY,"{'national': None, 'wmo': None, 'icao': 'KOIC'}","{'latitude': 42.5665, 'longitude': -75.5242, '...",America/New_York,"{'model': {'start': '2022-04-24', 'end': '2025..."
1,0MV8M,{'en': 'Hill Air Force Base'},US,UT,"{'national': None, 'wmo': None, 'icao': 'KHIF'}","{'latitude': 41.1111, 'longitude': -111.9623, ...",America/Denver,"{'model': {'start': '2022-04-24', 'end': '2025..."
2,0NNEW,{'en': 'Effingham County Memorial Airport'},US,IL,"{'national': None, 'wmo': None, 'icao': 'K1H2'}","{'latitude': 39.0706, 'longitude': -88.5333, '...",America/Chicago,"{'model': {'start': '2022-05-07', 'end': '2025..."
3,0OBKP,{'en': 'Live Oak County Airport'},US,TX,"{'national': None, 'wmo': None, 'icao': 'K8T6'}","{'latitude': 28.3628, 'longitude': -98.1165, '...",America/Chicago,"{'model': {'start': '2022-05-06', 'end': '2025..."
4,0RJDR,{'en': 'Hotel (Gurley)'},US,NE,"{'national': None, 'wmo': None, 'icao': 'K1HW'}","{'latitude': 41.32, 'longitude': -102.83, 'ele...",America/Denver,"{'model': {'start': '2022-05-07', 'end': '2025..."
...,...,...,...,...,...,...,...,...
2930,ZGK9P,{'en': 'Victorville Airport'},US,CA,"{'national': None, 'wmo': None, 'icao': 'KVCV'}","{'latitude': 34.5972, 'longitude': -117.3828, ...",America/Los_Angeles,"{'model': {'start': '2022-04-24', 'end': '2025..."
2931,ZJ8AR,{'en': 'India - Sidney'},US,NE,"{'national': None, 'wmo': None, 'icao': 'K1IW'}","{'latitude': 41.05, 'longitude': -102.87, 'ele...",America/Denver,"{'model': {'start': '2022-05-07', 'end': '2025..."
2932,ZNWZW,{'en': 'Columbus Municipal Airport'},US,NE,"{'national': None, 'wmo': None, 'icao': 'KOLU'}","{'latitude': 41.45, 'longitude': -97.3333, 'el...",America/Chicago,"{'model': {'start': '2022-04-24', 'end': '2025..."
2933,ZUQJS,{'en': 'Ephraim-Gibraltar Airport'},US,WI,"{'national': None, 'wmo': None, 'icao': 'K3D2'}","{'latitude': 45.1357, 'longitude': -87.1881, '...",America/Menominee,"{'model': {'start': '2022-05-07', 'end': '2025..."


In [11]:
# extract lat long from table
usStationsdf['latitude'] = [x['latitude'] for x in usStationsdf['location']]
usStationsdf['longitude'] = [x['longitude'] for x in usStationsdf['location']]

In [12]:
display(usStationsdf[['latitude', 'longitude']])
display(PM2024geo.drop_duplicates(('Latitude', 'Longitude'))[['Latitude', 'Longitude']])

Unnamed: 0,latitude,longitude
0,42.5665,-75.5242
1,41.1111,-111.9623
2,39.0706,-88.5333
3,28.3628,-98.1165
4,41.3200,-102.8300
...,...,...
2930,34.5972,-117.3828
2931,41.0500,-102.8700
2932,41.4500,-97.3333
2933,45.1357,-87.1881


Unnamed: 0,Latitude,Longitude
0,30.497478,-87.880258
4114,33.284928,-85.803608
9181,34.288567,-85.969858
14048,33.988210,-85.992556
19113,34.685470,-86.588160
...,...,...
4735300,43.457760,-110.797990
4740194,44.373056,-110.830833
4745899,44.007919,-107.956965
4750123,18.420089,-66.150615


In [13]:
# usStationsdf['latitude'] = round(usStationsdf['latitude'], 2)
# usStationsdf['longitude'] = round(usStationsdf['longitude'], 2)

# PM2024geo['Latitude'] = round(PM2024geo['Latitude'], 2)
# PM2024geo['Longitude'] = round(PM2024geo['Longitude'], 2)

# display(usStationsdf[['latitude', 'longitude']])
# display(PM2024geo.drop_duplicates(('Latitude', 'Longitude'))[['Latitude', 'Longitude']])

In [14]:
# usStationsdf.merge(PM2024geo.drop_duplicates(('Latitude', 'Longitude')), left_on=('latitude', 'longitude'), right_on=('Latitude', 'Longitude'))

In [15]:
# add geometry column for long/lat comparisons
usStationsgeo = to_gdf(usStationsdf)

In [16]:
# project to mercator for meter comparisons
PM2024geo = PM2024geo.to_crs(epsg=3857)
PM2024_88502geo = PM2024_88502geo.to_crs(epsg=3857)
usStationsgeo = usStationsgeo.to_crs(epsg=3857)

In [17]:
display(PM2024geo.shape)
display(PM2024_88502geo.shape)
display(usStationsgeo.shape)

(4759894, 25)

(1042875, 25)

(2935, 11)

In [18]:
display(PM2024geo['geometry'])
display(PM2024_88502geo['geometry'])
display(usStationsgeo['geometry'])

0           POINT (-9782785.571 3567657.627)
1           POINT (-9782785.571 3567657.627)
2           POINT (-9782785.571 3567657.627)
3           POINT (-9782785.571 3567657.627)
4           POINT (-9782785.571 3567657.627)
                         ...                
4759889    POINT (-12351065.930 3670637.147)
4759890    POINT (-12351065.930 3670637.147)
4759891    POINT (-12351065.930 3670637.147)
4759892    POINT (-12351065.930 3670637.147)
4759893    POINT (-12351065.930 3670637.147)
Name: geometry, Length: 4759894, dtype: geometry

0          POINT (-14979956.300 8049375.275)
1          POINT (-14979956.300 8049375.275)
2          POINT (-14979956.300 8049375.275)
3          POINT (-14979956.300 8049375.275)
4          POINT (-14979956.300 8049375.275)
                         ...                
1042870     POINT (-7474447.801 2062850.975)
1042871     POINT (-7474447.801 2062850.975)
1042872     POINT (-7474447.801 2062850.975)
1042873     POINT (-7474447.801 2062850.975)
1042874     POINT (-7474447.801 2062850.975)
Name: geometry, Length: 1042875, dtype: geometry

0        POINT (-8407315.487 5246219.783)
1       POINT (-12463586.224 5028742.725)
2        POINT (-9855481.874 4731789.468)
3       POINT (-10922278.818 3294792.050)
4       POINT (-11446983.238 5059656.788)
                      ...                
2930    POINT (-13066993.524 4109276.073)
2931    POINT (-11451436.018 5019719.452)
2932    POINT (-10835093.393 5078944.869)
2933     POINT (-9705734.895 5642910.032)
2934     POINT (-9221183.416 5251980.286)
Name: geometry, Length: 2935, dtype: geometry

In [19]:
m = PM2024geo.drop_duplicates(('Latitude', 'Longitude')).explore()
m = PM2024_88502geo.drop_duplicates(('Latitude', 'Longitude')).explore(m = m, color='green')
usStationsgeo.explore(m=m, color='red')

In [28]:
cmaq = pd.read_csv('cmaq_grid.csv')
cmaq.describe()

Unnamed: 0,Col,Row,Lon,Lat,Conc
count,137236.0,137236.0,137236.0,137236.0,137236.0
mean,229.991657,149.994644,-94.704555,39.076362,3.845746
std,132.497259,86.310708,18.92622,9.196761,3.732572
min,1.0,1.0,-134.38901,20.66346,0.013718
25%,115.0,75.0,-110.92909,31.15122,1.421382
50%,230.0,150.0,-94.644305,39.07545,2.64377
75%,345.0,225.0,-78.50867,46.9638,4.769837
max,459.0,299.0,-54.71756,56.47393,106.902


In [38]:
list(range(0, 10000, 46))

[0,
 46,
 92,
 138,
 184,
 230,
 276,
 322,
 368,
 414,
 460,
 506,
 552,
 598,
 644,
 690,
 736,
 782,
 828,
 874,
 920,
 966,
 1012,
 1058,
 1104,
 1150,
 1196,
 1242,
 1288,
 1334,
 1380,
 1426,
 1472,
 1518,
 1564,
 1610,
 1656,
 1702,
 1748,
 1794,
 1840,
 1886,
 1932,
 1978,
 2024,
 2070,
 2116,
 2162,
 2208,
 2254,
 2300,
 2346,
 2392,
 2438,
 2484,
 2530,
 2576,
 2622,
 2668,
 2714,
 2760,
 2806,
 2852,
 2898,
 2944,
 2990,
 3036,
 3082,
 3128,
 3174,
 3220,
 3266,
 3312,
 3358,
 3404,
 3450,
 3496,
 3542,
 3588,
 3634,
 3680,
 3726,
 3772,
 3818,
 3864,
 3910,
 3956,
 4002,
 4048,
 4094,
 4140,
 4186,
 4232,
 4278,
 4324,
 4370,
 4416,
 4462,
 4508,
 4554,
 4600,
 4646,
 4692,
 4738,
 4784,
 4830,
 4876,
 4922,
 4968,
 5014,
 5060,
 5106,
 5152,
 5198,
 5244,
 5290,
 5336,
 5382,
 5428,
 5474,
 5520,
 5566,
 5612,
 5658,
 5704,
 5750,
 5796,
 5842,
 5888,
 5934,
 5980,
 6026,
 6072,
 6118,
 6164,
 6210,
 6256,
 6302,
 6348,
 6394,
 6440,
 6486,
 6532,
 6578,
 6624,
 6670,
 671

In [39]:
to_gdf(cmaq, 'Lat', 'Lon').iloc[list(range(0, 10000, 46)),].explore()

In [None]:
import matplotlib.pyplot as plt

usStationsgeo.plot()
plt.xlim(-2e7, 0)
plt.ylim(0, 1e7)

In [None]:
# add Buffer df1 points by 100m
usStationsgeo['geometry'] = usStationsgeo.buffer(1000)

In [None]:
base = usStationsgeo.plot()
PM2010geonoDup.plot(ax=base)

In [None]:
# Match all stations within 100m
matches = gpd.sjoin(usStationsgeo, PM2010geonoDup, how='inner', predicate='intersects')
matches