# Explores Weather Information in the US

Clean Data

In [35]:
with open("artifacts/ghcnd-stations.txt") as bad_stations, open("artifacts/ghcnd-stations-fixed.txt", 'w+') as good_stations:
    for b in bad_stations:
        good_stations.write("[" + '[,['.join(b[:74].split(None, 4)) + "[")
        good_stations.write("\n")


In [36]:
import pandas as pd
import numpy as np

# https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf
with open("artifacts/ghcnd-stations-fixed.txt") as reader:
    ghcnd_stations = pd.read_csv(
        reader,
        header=None,
        quotechar='[',
        sep=',',
        names=["station_id", "latitude", "longitude", "elevation_above_sea_level", "station_name"]
    )

ghcnd_stations

Unnamed: 0,station_id,latitude,longitude,elevation_above_sea_level,station_name
0,ACW00011604,17.1167,-61.7833,10.1,ST JOHNS COOLIDGE FLD
1,ACW00011647,17.1333,-61.7833,19.2,ST JOHNS
2,AE000041196,25.3330,55.5170,34.0,SHARJAH INTER. AIRP GS
3,AEM00041194,25.2550,55.3640,10.4,DUBAI INTL
4,AEM00041217,24.4330,54.6510,26.8,ABU DHABI INTL
...,...,...,...,...,...
118487,ZI000067969,-21.0500,29.3670,861.0,WEST NICHOLSON
118488,ZI000067975,-20.0670,30.8670,1095.0,MASVINGO
118489,ZI000067977,-21.0170,31.5830,430.0,BUFFALO RANGE
118490,ZI000067983,-20.2000,32.6160,1132.0,CHIPINGE GS


Filter only US weather stations.

In [37]:
ghcnd_stations.query("station_id == 'US1WAKG0196'")

Unnamed: 0,station_id,latitude,longitude,elevation_above_sea_level,station_name
89111,US1WAKG0196,47.3539,-122.3181,94.8,WA DES MOINES 2.9 S


In [2]:
us_stations = ghcnd_stations[ghcnd_stations['station_id'].str.startswith('US')]
us_stations

Unnamed: 0,station_id,latitude,longitude,elevation_above_sea_level,station_name,measurement_flag,quality_flag,source_flag
52532,US009052008,43.7333,-96.6333,482.0,SIOUX FALLS (ENVIRON. CAN,DA),,
52533,US10RMHS145,40.5268,105.1113,1569.1,RMHS 1.6 SSW,,,
52534,US10adam001,40.5680,-98.5069,598.0,JUNIATA 1.5 S,,,
52535,US10adam002,40.5093,-98.5493,601.1,JUNIATA 6.0 SSW,,,
52536,US10adam003,40.4663,-98.6537,615.1,HOLSTEIN 0.1 NW,,,
...,...,...,...,...,...,...,...,...
117697,USW00096405,60.4731,145.3542,25.3,CORDOVA 14 ESE,,,
117698,USW00096406,64.5014,154.1297,78.9,RUBY 44 ESE,,,70224.0
117699,USW00096407,66.5620,159.0036,6.7,SELAWIK 28 E,,,70170.0
117700,USW00096408,63.4519,150.8747,678.2,DENALI 27 N,,,70244.0


Reusable functions.

In [3]:
def coordinate_msg(latitude: float, longitude: float):
    if latitude > 0:
        latitude_msg = f"{latitude}°N"
    elif latitude < 0:
        latitude_msg = f"{abs(latitude)}°S"
    else:
        latitude_msg = "0°"

    if longitude > 0:
        longitude_msg = f"{longitude}°E"
    elif longitude < 0:
        longitude_msg = f"{abs(longitude)}°W"
    else:
        longitude_msg = "0°"


    return ','.join([latitude_msg, longitude_msg])

For each of the weather stations, pick the closest zip code.

In [4]:
from uszipcode import SearchEngine


def find_closest_zip_code(latitude: float, longitude: float):
    search = SearchEngine()
    result = search.by_coordinates(lat=latitude, lng=longitude, radius=5)

    if len(result) == 0:
        return pd.NA
    else:
        return result[0].zipcode
    

us_stations['zip_code'] =  np.vectorize(find_closest_zip_code)(us_stations['latitude'], us_stations['longitude'])
us_stations

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_stations['zip_code'] =  np.vectorize(find_closest_zip_code)(us_stations['latitude'], us_stations['longitude'])


Unnamed: 0,station_id,latitude,longitude,elevation_above_sea_level,station_name,measurement_flag,quality_flag,source_flag,zip_code
52532,US009052008,43.7333,-96.6333,482.0,SIOUX FALLS (ENVIRON. CAN,DA),,,
52533,US10RMHS145,40.5268,105.1113,1569.1,RMHS 1.6 SSW,,,,
52534,US10adam001,40.5680,-98.5069,598.0,JUNIATA 1.5 S,,,,68955
52535,US10adam002,40.5093,-98.5493,601.1,JUNIATA 6.0 SSW,,,,68973
52536,US10adam003,40.4663,-98.6537,615.1,HOLSTEIN 0.1 NW,,,,68950
...,...,...,...,...,...,...,...,...,...
117697,USW00096405,60.4731,145.3542,25.3,CORDOVA 14 ESE,,,,
117698,USW00096406,64.5014,154.1297,78.9,RUBY 44 ESE,,,70224.0,
117699,USW00096407,66.5620,159.0036,6.7,SELAWIK 28 E,,,70170.0,
117700,USW00096408,63.4519,150.8747,678.2,DENALI 27 N,,,70244.0,


In [7]:
dsm_stations = us_stations[ghcnd_stations['station_name'].str.find('DES MOINES') != -1]
dsm_stations

  dsm_stations = us_stations[ghcnd_stations['station_name'].str.find('DES MOINES') != -1]


Unnamed: 0,station_id,latitude,longitude,elevation_above_sea_level,station_name,measurement_flag,quality_flag,source_flag,zip_code
63855,US1IAPK0003,41.6066,-93.6956,260.9,DES MOINES 4.5 WNW,,,,50324.0
63856,US1IAPK0008,41.6161,-93.6708,257.9,DES MOINES 3.9 NW,,,,50310.0
63860,US1IAPK0017,41.5482,-93.6036,280.1,DES MOINES 2.1 SSE,,,,50307.0
63862,US1IAPK0022,41.548,-93.594,287.1,DES MOINES 2.3 SSE,,,,50307.0
63863,US1IAPK0023,41.5816,-93.7571,273.1,WEST DES MOINES 0.8 NNW,,,,50265.0
63868,US1IAPK0030,41.5316,-93.6091,273.1,DES MOINES 3.1 S,,,,50320.0
63869,US1IAPK0033,41.567,-93.7798,278.0,WEST DES MOINES 1.4 W,,,,50266.0
63870,US1IAPK0034,41.6097,-93.6035,256.0,DES MOINES 2.4 NNE,,,,50316.0
63873,US1IAPK0042,41.5303,-93.6312,288.0,DES MOINES 3.3 SSW,,,,50321.0
63878,US1IAPK0063,41.6484,-93.6906,261.5,DES MOINES 5.4 NW,,,,50310.0


In [5]:
import folium
m = folium.Map(
    [39.833333,-98.583333],
    zoom_start=4.6
)
m

In [6]:
import folium
import json
m = folium.Map(
    [39.833333,-98.583333],
    zoom_start=4.6
)

with open("artifacts/geojson/ia_iowa_zip_codes_geo.min.json") as iowa_zip_geojson_r:
    geo_json_data = json.load(iowa_zip_geojson_r)

for index, dsm_station in dsm_stations.iterrows():
    folium.Marker(
        location=[dsm_station.latitude, dsm_station.longitude],
        tooltip=dsm_station.station_id,
        popup=f"{dsm_station.station_name}\n{coordinate_msg(dsm_station.latitude, dsm_station.longitude)}",
        icon=folium.Icon(icon="red"),
    ).add_to(m)

folium.GeoJson(
    geo_json_data,
).add_to(m)

m

NameError: name 'dsm_stations' is not defined

In [None]:
from uszipcode import SearchEngine
search = SearchEngine()
result = search.by_coordinates(41.6843, -93.6292, radius=5)
result