In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry
import openmeteo_requests

import requests_cache
from retry_requests import retry

from shapely import wkt
from shapely.geometry import Point
import geopandas as gpd

import warnings

warnings.filterwarnings('ignore')

from pyproj import Proj, transform
import datetime as dt

import requests_cache
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from openmeteo_requests import Client
import numpy as np

## Calculate centroids for each meshblock to get coordinates

Calculate the centroid of each aggregate the centroids of the meshblocks.

By calculating the centroid, we are essentially representing the entire meshblock by a single point, which is the geometric center of the meshblock.

If we need to retrieve weather data for the entire area of the meshblock, using the centroid as a representative point is a common and practical approach I feel. 

If the area is small or the weather conditions are expected to be relatively uniform across the meshblock, this approach can be quite effective. 

In [2]:
file_path = 'meshblocks-auckland-higher.csv'
meshblock_data = pd.read_csv(file_path)

# Parse the WKT column to extract geometries
meshblock_data['geometry'] = meshblock_data['WKT'].apply(wkt.loads)

# Convert to a GeoDataFrame
gdf = gpd.GeoDataFrame(meshblock_data, geometry='geometry')

# Set the initial CRS to EPSG:2193 (modify this if different)
gdf.set_crs(epsg=2193, inplace=True)

# Convert to WGS 84 (latitude and longitude)
gdf = gdf.to_crs(epsg=4326)

# Calculate centroids for each geometry
gdf['centroid'] = gdf['geometry'].centroid

# Extract latitude and longitude from centroids
gdf['latitude'] = gdf['centroid'].apply(lambda x: x.y)
gdf['longitude'] = gdf['centroid'].apply(lambda x: x.x)

# Display the data with centroids
meshblock_centroids = gdf[['MB2024_V1_00', 'latitude', 'longitude', 'SA12023_V1_00', 'SA22023_V1_00', 'SA32023_V1_00']]
meshblock_centroids.head()


Unnamed: 0,MB2024_V1_00,latitude,longitude,SA12023_V1_00,SA22023_V1_00,SA32023_V1_00
0,4019145,-36.899286,174.716277,7037220,135500,51440
1,4001328,-36.852361,174.777485,7037219,136400,51200
2,827902,-37.218339,174.943047,7032417,169901,52450
3,4014920,-37.100411,174.946908,7032175,164302,52250
4,4004236,-37.093404,174.951154,7010152,164302,52250


Remove duplicate SA2 values 

In [3]:
# Remove duplicate rows based on 'SA22023_V1_00'
SA2_SA3_unique = meshblock_centroids.drop_duplicates(subset=['SA22023_V1_00'])

Run weather API for all SA2, SA3 values 

In [32]:
# Set up the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_strategy = Retry(total=5, backoff_factor=0.2, status_forcelist=[429, 500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retry_strategy)
cache_session.mount("https://", adapter)
cache_session.mount("http://", adapter)
openmeteo = Client(session=cache_session)

# Define a function to get weather data for a given latitude and longitude
def get_weather_data(latitude, longitude):
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "daily": ["weather_code", "temperature_2m_min", "wind_speed_10m_max"],
        "timezone": "Pacific/Auckland",
        "forecast_days": 14
    }
    response = openmeteo.weather_api(url, params=params)[0]
    
    daily = response.Daily()
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "weather_code": daily.Variables(0).ValuesAsNumpy(),
        "temperature_2m_min": daily.Variables(1).ValuesAsNumpy(),
        "wind_speed_10m_max": daily.Variables(2).ValuesAsNumpy()
    }
    return pd.DataFrame(data=daily_data)

# Initialize an empty list to store the weather data for all locations
all_weather_data = []

# Loop through each unique SA22023_V1_00 value and get the weather data
for index, row in SA2_SA3_unique.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    sa22023_v1_00 = row['SA22023_V1_00']
    sa32023_v1_00 = row['SA32023_V1_00']
    
    # Fetch weather data
    weather_data = get_weather_data(latitude, longitude)
    
    # Add the SA22023_V1_00 value to the weather data
    weather_data['SA22023_V1_00'] = sa22023_v1_00
    weather_data['SA32023_V1_00'] = sa32023_v1_00
    
    # Append to the list
    all_weather_data.append(weather_data)

# Combine all the weather data into a single DataFrame
combined_weather_data = pd.concat(all_weather_data, ignore_index=True)

# Display the combined weather data
#import ace_tools as tools; tools.display_dataframe_to_user(name="Combined Weather Data", dataframe=combined_weather_data)




In [38]:
combined_weather_data['SA22023_V1_00'] = combined_weather_data['SA22023_V1_00'].astype(int)
combined_weather_data['SA32023_V1_00'] = combined_weather_data['SA32023_V1_00'].astype(int)

Mapping weather codes to CAS weatherA and weatehrB

In [41]:
# Define mapping functions for weatherA, based on weather_code from open-meteo and filtered date
def map_to_weatherA(row):
    if row['weather_code'] in [71, 73, 75, 77, 85, 86]:
        return 'Snow'
    elif row['weather_code'] in [63, 65, 81, 82, 95]:
        return 'Heavy rain'
    elif row['weather_code'] in [51, 53, 55, 56, 57, 61, 80]:
        return 'Light rain'
    elif row['weather_code'] in [66, 67, 96, 99]:
        return 'Hail or Sleet'
    elif row['weather_code'] in [45, 48]:
        return 'Mist or Fog'
    else:
        return 'Fine'

#for weatherB, based on wind speed and temperature. 
def map_to_weatherB(row):
    if row['wind_speed_10m_max'] > 30:
        return 'Strong wind'
    elif row['temperature_2m_min'] <= 0:
        return 'Frost'
    else:
        return 'None'
    
for index, row in combined_weather_data.iterrows():

# Apply the mapping functions
    combined_weather_data['weatherA'] = combined_weather_data.apply(map_to_weatherA, axis=1)
    combined_weather_data['weatherB'] = combined_weather_data.apply(map_to_weatherB, axis=1)
    combined_weather_data['light'] = 'day'
# extract date only
    combined_weather_data['date'] = pd.to_datetime(combined_weather_data['date'])
    combined_weather_data['date'] = combined_weather_data['date'].dt.date


Generate light condition for 'night' and combine two datasets

In [51]:
result_day = combined_weather_data[['date', 'SA22023_V1_00','SA32023_V1_00','weatherA','weatherB', 'light']]
result_night = combined_weather_data.copy()

for index, row in result_night.iterrows():

    result_night['light'] = 'night'
    result_night = result_night[['date', 'SA22023_V1_00','SA32023_V1_00','weatherA', 'weatherB', 'light']]
#print(result_day)
#print(result_night)
combined_result = pd.concat([result_day, result_night], ignore_index=True)

# Sort the DataFrame by 'date' and then by 'light' to ensure 'Day' comes before 'Night'
combined_result.sort_values(by=['date'], ascending=[True], inplace=True)

# Reset index after sorting
combined_result.reset_index(drop=True, inplace=True)

#combined_result.to_csv('weather_forecaset_sa3.csv', index=False)
combined_result


            date  SA22023_V1_00  SA32023_V1_00    weatherA     weatherB  light
0     2024-05-28         135500          51440  Heavy rain  Strong wind  night
1     2024-05-29         135500          51440        Fine  Strong wind  night
2     2024-05-30         135500          51440        Fine         None  night
3     2024-05-31         135500          51440        Fine         None  night
4     2024-06-01         135500          51440        Fine         None  night
...          ...            ...            ...         ...          ...    ...
8717  2024-06-06         112901          50480  Light rain         None  night
8718  2024-06-07         112901          50480        Fine         None  night
8719  2024-06-08         112901          50480        Fine         None  night
8720  2024-06-09         112901          50480        Fine         None  night
8721  2024-06-10         112901          50480        Fine         None  night

[8722 rows x 6 columns]


In [65]:
combined_result.to_csv('weather_forecaset_all.csv', index=False)
