# Sources

https://open-meteo.com/ 

Documentation:
https://open-meteo.com/en/docs/historical-weather-api/

Variables = Temperature, Precipitation, Wind Speed 

Coordinates 

Range = July 2023 to December 2023


In [9]:
#importing libraries
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import pandas as pd
import requests

In [13]:
#Target the center of each borough
#it may rain in manhattan but not queens

# Data
boroughs_data = [
    ["881 7th Ave", "New York", "New York", "10019"],
    ["2344 Arthur Ave", "Bronx", "New York", "10458"],
    ["294 Utica Ave", "Brooklyn", "New York", "11213"],
    ["2286 Richmond Rd", "Staten Island", "New York", "10306"],
    ["98-11 44th Ave", "Queens", "New York", "11368"]
]

# Column names
columns = ["street", "city", "state", "zip"]

# Create DataFrame
boroughs_df = pd.DataFrame(boroughs_data, columns=columns)

# Display DataFrame
print(boroughs_df)


             street           city     state    zip
0       881 7th Ave       New York  New York  10019
1   2344 Arthur Ave          Bronx  New York  10458
2     294 Utica Ave       Brooklyn  New York  11213
3  2286 Richmond Rd  Staten Island  New York  10306
4    98-11 44th Ave         Queens  New York  11368


In [14]:
#preparing to geocode here using census API
#setting parameters and arguements
def geocode_address(address):
    base_url = "https://geocoding.geo.census.gov/geocoder/geographies/address"
    api_key = '79ad27ee9e5d5d4b9884090db801205920bdeab6'

    # Prepare the request parameters
    params = {
        'street': address['street'],
        'city': address['city'],
        'state': address['state'],
        'zip': address['zip'],
        'benchmark': 'Public_AR_Census2020',
        'vintage': 'Census2020_Census2020',
        'format': 'json',
        'layers': 'all',
        'key': api_key,
    }
    
    # Make the API request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()

        # Check if there is at least one match for the address
        if 'addressMatches' in data['result'] and data['result']['addressMatches']:
            # Extract the latitude, longitude, GEOID, TRACT CODE, and Block Code from the first match
            coordinates = data['result']['addressMatches'][0]['coordinates']
            latitude = coordinates['y']
            longitude = coordinates['x']
            
            geoid = data['result']['addressMatches'][0]['geographies']['Census Blocks'][0]['GEOID']
            tract_code = data['result']['addressMatches'][0]['geographies']['Census Tracts'][0]['TRACT']
    
            return latitude, longitude, geoid, tract_code
        else:
            print(f"No match found for the address: {address}")
            return None

    else:
        # Print an error message if the request was unsuccessful
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

In [15]:
# Create empty lists to store latitude, longitude, GEOID, TRACT CODE, and Block Code
latitudes = []
longitudes = []
geoids = []
tract_codes = []

In [20]:
%%time
#execution of geocoding 
from tqdm import tqdm

# Use tqdm to create a progress bar for the iteration
for index, row in tqdm(boroughs_df.iterrows(), total=len(boroughs_df), desc="Geocoding Progress"):
    address = {
        'street': row['street'],
        'city': row['city'],
        'state': row['state'],
        'zip': row['zip'],
    }

    result = geocode_address(address)

    if result:
        latitudes.append(result[0])
        longitudes.append(result[1])
        geoids.append(result[2])
        tract_codes.append(result[3])
    else:
        latitudes.append(None)
        longitudes.append(None)
        geoids.append(None)
        tract_codes.append(None)

# Add the latitude, longitude, GEOID, TRACT CODE columns to the existing DataFrame
boroughs_df['latitude'] = latitudes
boroughs_df['longitude'] = longitudes
boroughs_df['geoid'] = geoids
boroughs_df['tract_code'] = tract_codes


Geocoding Progress: 100%|█████████████████████████| 5/5 [00:16<00:00,  3.37s/it]

             street           city     state    zip   latitude  longitude  \
0       881 7th Ave       New York  New York  10019  40.764866 -73.980427   
1   2344 Arthur Ave          Bronx  New York  10458  40.854237 -73.888620   
2     294 Utica Ave       Brooklyn  New York  11213  40.668028 -73.931270   
3  2286 Richmond Rd  Staten Island  New York  10306  40.578309 -74.116376   
4    98-11 44th Ave         Queens  New York  11368  40.745606 -73.865856   

             geoid tract_code  
0  360610137002001     013700  
1  360050391004002     039100  
2  360470351021001     035102  
3  360850122003001     012200  
4  360810409012002     040901  
CPU times: user 340 ms, sys: 45.8 ms, total: 386 ms
Wall time: 16.9 s





In [21]:
# Display or save the modified DataFrame
boroughs_df.head()

Unnamed: 0,street,city,state,zip,latitude,longitude,geoid,tract_code
0,881 7th Ave,New York,New York,10019,40.764866,-73.980427,360610137002001,13700
1,2344 Arthur Ave,Bronx,New York,10458,40.854237,-73.88862,360050391004002,39100
2,294 Utica Ave,Brooklyn,New York,11213,40.668028,-73.93127,360470351021001,35102
3,2286 Richmond Rd,Staten Island,New York,10306,40.578309,-74.116376,360850122003001,12200
4,98-11 44th Ave,Queens,New York,11368,40.745606,-73.865856,360810409012002,40901


In [22]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [23]:

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": [40.6501, 40.8499, 40.6815, 40.5623, 40.7834],
    "longitude": [-73.9496, -73.8664, -73.8365, -74.1399, -73.9663],
    "start_date": "2023-07-01",
    "end_date": "2023-12-31",
    "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "rain", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "is_day"],
    "timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_rain = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
hourly_wind_direction_100m = hourly.Variables(7).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(8).ValuesAsNumpy()
hourly_is_day = hourly.Variables(9).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
    start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
    end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
    freq = pd.Timedelta(seconds = hourly.Interval()),
    inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["is_day"] = hourly_is_day

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)


Coordinates 40.6678352355957°N -73.93768310546875°E
Elevation 19.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s
                          date  temperature_2m  relative_humidity_2m  \
0    2023-07-01 04:00:00+00:00       19.459000             83.071091   
1    2023-07-01 05:00:00+00:00       19.209000             83.306427   
2    2023-07-01 06:00:00+00:00       19.209000             80.948868   
3    2023-07-01 07:00:00+00:00       17.609001             88.066505   
4    2023-07-01 08:00:00+00:00       18.409000             83.479706   
...                        ...             ...                   ...   
4411 2023-12-31 23:00:00+00:00        4.559000             59.783077   
4412 2024-01-01 00:00:00+00:00        3.559000             65.567383   
4413 2024-01-01 01:00:00+00:00        4.109000             57.272648   
4414 2024-01-01 02:00:00+00:00        3.209000             63.335499   
4415 2024-01-01 03:00:00+00:00        2.309000             70