# Sources

https://open-meteo.com/ 

Documentation:
https://open-meteo.com/en/docs/historical-weather-api/

Variables = Temperature, Precipitation, Wind Speed 

Coordinates 

Range = July 2023 to December 2023


In [None]:
#importing libraries
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import pandas as pd
import requests

In [22]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [23]:

#retrieving datapoint
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": [40.6501, 40.8499, 40.6815, 40.5623, 40.7834],
    "longitude": [-73.9496, -73.8664, -73.8365, -74.1399, -73.9663],
    "start_date": "2023-07-01",
    "end_date": "2023-12-31",
    "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "rain", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "is_day"],
    "timezone": "America/New_York"
}

# Make API request for each location
all_hourly_data = []
for lat, long in zip(params["latitude"], params["longitude"]):
    params["latitude"] = lat
    params["longitude"] = long
    response = openmeteo.weather_api(url, params=params)[0]  # Assuming only one response per location

    # Process hourly data for each location
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
    hourly_rain = hourly.Variables(3).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
    hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
    hourly_wind_direction_100m = hourly.Variables(7).ValuesAsNumpy()
    hourly_wind_gusts_10m = hourly.Variables(8).ValuesAsNumpy()
    hourly_is_day = hourly.Variables(9).ValuesAsNumpy()

    # Convert to DataFrame
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "latitude": lat,
        "longitude": long,
        "temperature_2m": hourly_temperature_2m,
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "precipitation": hourly_precipitation,
        "rain": hourly_rain,
        "wind_speed_10m": hourly_wind_speed_10m,
        "wind_speed_100m": hourly_wind_speed_100m,
        "wind_direction_10m": hourly_wind_direction_10m,
        "wind_direction_100m": hourly_wind_direction_100m,
        "wind_gusts_10m": hourly_wind_gusts_10m,
        "is_day": hourly_is_day
    }

    hourly_dataframe = pd.DataFrame(data=hourly_data)
    all_hourly_data.append(hourly_dataframe)

# Concatenate dataframes for all locations
final_dataframe = pd.concat(all_hourly_data, ignore_index=True)
print(final_dataframe)


Coordinates 40.6678352355957°N -73.93768310546875°E
Elevation 19.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s
                          date  temperature_2m  relative_humidity_2m  \
0    2023-07-01 04:00:00+00:00       19.459000             83.071091   
1    2023-07-01 05:00:00+00:00       19.209000             83.306427   
2    2023-07-01 06:00:00+00:00       19.209000             80.948868   
3    2023-07-01 07:00:00+00:00       17.609001             88.066505   
4    2023-07-01 08:00:00+00:00       18.409000             83.479706   
...                        ...             ...                   ...   
4411 2023-12-31 23:00:00+00:00        4.559000             59.783077   
4412 2024-01-01 00:00:00+00:00        3.559000             65.567383   
4413 2024-01-01 01:00:00+00:00        4.109000             57.272648   
4414 2024-01-01 02:00:00+00:00        3.209000             63.335499   
4415 2024-01-01 03:00:00+00:00        2.309000             70

In [None]:
# Define the missing timestamp
missing_timestamp = pd.Timestamp('2023-12-31 23:00:00', tz='America/New_York')

# Define the parameters for the missing timestamp
missing_params = {
    "latitude": lat,
    "longitude": long,
    "start_date": missing_timestamp.strftime('%Y-%m-%d'),
    "end_date": missing_timestamp.strftime('%Y-%m-%d'),
    "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "rain", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "is_day"],
    "timezone": "America/New_York"
}

# Make API request for the missing timestamp
missing_response = openmeteo.weather_api(url, params=missing_params)[0]  # Assuming only one response

# Process hourly data for the missing timestamp
missing_hourly = missing_response.Hourly()
missing_temperature_2m = missing_hourly.Variables(0).ValuesAsNumpy()
missing_relative_humidity_2m = missing_hourly.Variables(1).ValuesAsNumpy()
missing_precipitation = missing_hourly.Variables(2).ValuesAsNumpy()
missing_rain = missing_hourly.Variables(3).ValuesAsNumpy()
missing_wind_speed_10m = missing_hourly.Variables(4).ValuesAsNumpy()
missing_wind_speed_100m = missing_hourly.Variables(5).ValuesAsNumpy()
missing_wind_direction_10m = missing_hourly.Variables(6).ValuesAsNumpy()
missing_wind_direction_100m = missing_hourly.Variables(7).ValuesAsNumpy()
missing_wind_gusts_10m = missing_hourly.Variables(8).ValuesAsNumpy()
missing_is_day = missing_hourly.Variables(9).ValuesAsNumpy()

# Create a new row with the missing timestamp and retrieved values
new_row = {
    'date': missing_timestamp,
    'latitude': lat,
    'longitude': long,
    'temperature_2m': missing_temperature_2m[0],
    'relative_humidity_2m': missing_relative_humidity_2m[0],
    'precipitation': missing_precipitation[0],
    'rain': missing_rain[0],
    'wind_speed_10m': missing_wind_speed_10m[0],
    'wind_speed_100m': missing_wind_speed_100m[0],
    'wind_direction_10m': missing_wind_direction_10m[0],
    'wind_direction_100m': missing_wind_direction_100m[0],
    'wind_gusts_10m': missing_wind_gusts_10m[0],
    'is_day': missing_is_day[0]
}

# Append the new row to the DataFrame
final_dataframe = final_dataframe.append(new_row, ignore_index=True)

print(final_dataframe)


In [None]:
import pytz

def utc_to_est(utc_timestamp):
    # Define Eastern Time (ET) timezone
    est_timezone = pytz.timezone('America/New_York')
    
    # Convert UTC timestamp to Eastern Time (ET)
    est_dt = utc_timestamp.astimezone(est_timezone)
    
    return est_dt

When the offset is "-04:00", it means Eastern Daylight Time (EDT), which is used during daylight saving time from March to November. This corresponds to UTC-4 hours.

When the offset is "-05:00", it means Eastern Standard Time (EST), which is used during the rest of the year when daylight saving time is not in effect. This corresponds to UTC-5 hours.

In [None]:
final_dataframe['date'] = final_dataframe['date'].apply(utc_to_est)
print(final_dataframe)

In [None]:
# Define a function to map latitude values to borough names
def map_latitude_to_borough(latitude):
    if latitude == 40.6501:
        return 'Brooklyn'
    elif latitude == 40.8499:
        return 'Bronx'
    elif latitude == 40.6815:
        return 'Queens'
    elif latitude == 40.5623:
        return 'Staten Island'
    elif latitude == 40.7834:
        return 'Manhattan'
    else:
        return None  # Return None for unknown latitude values

# Apply the function to create the new 'borough' column
final_dataframe['borough'] = final_dataframe['latitude'].apply(map_latitude_to_borough)

print(final_dataframe)


In [None]:
# Check for null values in the 'borough' column
null_boroughs = final_dataframe[final_dataframe['borough'].isnull()]

# Display rows with null values in the 'borough' column
print(null_boroughs)