In [114]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry
import openmeteo_requests

import requests_cache
from retry_requests import retry

from shapely import wkt
from shapely.geometry import Point
import geopandas as gpd

import warnings

warnings.filterwarnings('ignore')

from pyproj import Proj, transform
import datetime as dt


## Calculate centroids for each meshblock to get coordinates

Calculate the centroid of each aggregate the centroids of the meshblocks.

By calculating the centroid, we are essentially representing the entire meshblock by a single point, which is the geometric center of the meshblock.

If we need to retrieve weather data for the entire area of the meshblock, using the centroid as a representative point is a common and practical approach I feel. 

If the area is small or the weather conditions are expected to be relatively uniform across the meshblock, this approach can be quite effective. 

I think this should be good enough to get weather information on each Meshblock. 


In [39]:
file_path = 'meshblocks-auckland.csv'
meshblock_data = pd.read_csv(file_path)

# Parse the WKT column to extract geometries
meshblock_data['geometry'] = meshblock_data['WKT'].apply(wkt.loads)

# Convert to a GeoDataFrame
gdf = gpd.GeoDataFrame(meshblock_data, geometry='geometry')

# Set the initial CRS to EPSG:2193 (modify this if different)
gdf.set_crs(epsg=2193, inplace=True)

# Convert to WGS 84 (latitude and longitude)
gdf = gdf.to_crs(epsg=4326)

# Calculate centroids for each geometry
gdf['centroid'] = gdf['geometry'].centroid

# Extract latitude and longitude from centroids
gdf['latitude'] = gdf['centroid'].apply(lambda x: x.y)
gdf['longitude'] = gdf['centroid'].apply(lambda x: x.x)

# Display the data with centroids
meshblock_centroids = gdf[['MB2022_V1_00', 'latitude', 'longitude', 'SA12022_V1_00', 'SA22022_V1_00']]
meshblock_centroids.head()


Unnamed: 0,MB2022_V1_00,latitude,longitude,SA12022_V1_00,SA22022_V1_00
0,834501,-37.177258,174.756727,7010308,162400
1,4006106,-36.571222,174.446321,7001366,111600
2,150104,-36.744939,174.749479,7003017,120800
3,165300,-36.701332,174.742409,7002042,117500
4,146300,-36.528864,174.4514,7001186,110800


## Aggregate centroids by MB2022_V1_00, SA12022 and SA22022


In [40]:
# Aggregate centroids by Mesh MB2022_V1_00
mesh_centroid = meshblock_centroids.groupby('MB2022_V1_00').agg({
    'latitude': 'mean',
    'longitude': 'mean'
}).reset_index()
mesh_centroid.head()

# Aggregate centroids by SA12022
sa1_centroids = meshblock_centroids.groupby('SA12022_V1_00').agg({
    'latitude': 'mean',
    'longitude': 'mean'
}).reset_index()

# Aggregate centroids by SA22022
sa2_centroids = meshblock_centroids.groupby('SA22022_V1_00').agg({
    'latitude': 'mean',
    'longitude': 'mean'
}).reset_index()

# Display aggregated centroids
mesh_centroid.head(),sa1_centroids.head(), sa2_centroids.head()


(   MB2022_V1_00   latitude   longitude
 0        146005 -36.447005  174.637796
 1        146104 -36.448109  174.671632
 2        146300 -36.528864  174.451400
 3        146400 -36.519188  174.492346
 4        146500 -36.502879  174.534495,
    SA12022_V1_00   latitude   longitude
 0        7001184 -36.464253  174.440685
 1        7001186 -36.528864  174.451400
 2        7001188 -36.483557  174.475554
 3        7001189 -36.556407  174.488362
 4        7001190 -36.519188  174.492346,
    SA22022_V1_00   latitude   longitude
 0         110700 -36.685651  174.414821
 1         110800 -36.515263  174.510876
 2         111200 -36.508434  174.652716
 3         111600 -36.610850  174.493627
 4         112100 -36.567242  174.648233)

## Get centroid co-ordinates from Meshblock, SA1, SA2 number


It depends on which level we want to use Mesh Block, SA1 or SA2

Example: find by Mesh, let say we want to find mesh 834501

In [41]:
mesh_code = 834501
mesh_coordinates = mesh_centroids[mesh_centroids['MB2022_V1_00'] == mesh_code][['latitude', 'longitude']]
mesh_coordinates 

Unnamed: 0,latitude,longitude
9494,-37.177258,174.756727


Find by SA1, Let's assume we want SA1 = 7001186

In [135]:
sa1_code = 7001186
SA1_coordinates = sa1_centroids[sa1_centroids['SA12022_V1_00'] == sa1_code][['latitude', 'longitude']]

SA1_coordinates

Unnamed: 0,latitude,longitude
1,-36.528864,174.4514


## Pass coordinates to weather code

7 days prediction

In [140]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": SA1_coordinates['latitude'], #pass SA1 latitude value here
	"longitude": SA1_coordinates['longitude'],#pass SA1 longitude value here
	"current": ["temperature_2m", "relative_humidity_2m", "is_day", "rain", "showers", "snowfall", "wind_speed_10m"],
	"hourly": ["temperature_2m", "precipitation", "rain", "showers", "snowfall", "snow_depth"],
	"daily": ["rain_sum", "showers_sum"],
	"timezone": "Pacific/Auckland"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Current values. The order of variables needs to be the same as requested.
current = response.Current()
current_temperature_2m = current.Variables(0).Value()
current_relative_humidity_2m = current.Variables(1).Value()
current_is_day = current.Variables(2).Value()
current_rain = current.Variables(3).Value()
current_showers = current.Variables(4).Value()
current_snowfall = current.Variables(5).Value()
current_wind_speed_10m = current.Variables(6).Value()

print(f"Current time {current.Time()}")
print(f"Current temperature_2m {current_temperature_2m}")
print(f"Current relative_humidity_2m {current_relative_humidity_2m}")
print(f"Current is_day {current_is_day}")
print(f"Current rain {current_rain}")
print(f"Current showers {current_showers}")
print(f"Current snowfall {current_snowfall}")
print(f"Current wind_speed_10m {current_wind_speed_10m}")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_rain = hourly.Variables(2).ValuesAsNumpy()
hourly_showers = hourly.Variables(3).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(5).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["showers"] = hourly_showers
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_rain_sum = daily.Variables(0).ValuesAsNumpy()
daily_showers_sum = daily.Variables(1).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["rain_sum"] = daily_rain_sum
daily_data["showers_sum"] = daily_showers_sum

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)

Coordinates -36.5°N 174.5°E
Elevation 30.0 m asl
Timezone b'Pacific/Auckland' b'NZST'
Timezone difference to GMT+0 43200 s
Current time 1716540300
Current temperature_2m 12.199999809265137
Current relative_humidity_2m 92.0
Current is_day 0.0
Current rain 0.0
Current showers 0.0
Current snowfall 0.0
Current wind_speed_10m 8.08999252319336
                         date  temperature_2m  precipitation  rain  showers  \
0   2024-05-23 12:00:00+00:00         11.9555            0.0   0.0      0.0   
1   2024-05-23 13:00:00+00:00         11.8555            0.0   0.0      0.0   
2   2024-05-23 14:00:00+00:00         11.5555            0.0   0.0      0.0   
3   2024-05-23 15:00:00+00:00         11.6055            0.0   0.0      0.0   
4   2024-05-23 16:00:00+00:00         11.7555            0.0   0.0      0.0   
..                        ...             ...            ...   ...      ...   
163 2024-05-30 07:00:00+00:00         14.5055            0.0   0.0      0.0   
164 2024-05-30 08:00:00+00:0

In [46]:
#cas_df = pd.read_csv('Crash_Analysis_System_(CAS)_data.csv')

In [47]:
#cas_df

Unnamed: 0,X,Y,OBJECTID,advisorySpeed,areaUnitID,bicycle,bridge,bus,carStationWagon,cliffBank,...,train,tree,truck,unknownVehicleType,urban,vanOrUtility,vehicle,waterRiver,weatherA,weatherB
0,1.756461e+06,5.936053e+06,3,,507000,0,,0,0,,...,,,0,0,Urban,1,,,Fine,Null
1,1.772256e+06,5.896393e+06,8,,525420,0,,0,1,,...,,,1,0,Urban,0,,,Fine,Null
2,1.766897e+06,5.907471e+06,10,,523601,0,0.0,0,2,0.0,...,0.0,0.0,0,0,Urban,0,1.0,0.0,Fine,Null
3,1.754057e+06,5.936391e+06,11,,506902,0,,0,2,,...,,,0,0,Urban,0,,,Fine,Null
4,1.772574e+06,5.901427e+06,14,,523813,0,,0,2,,...,,,0,0,Open,0,,,Light rain,Null
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285341,1.743947e+06,5.924009e+06,1318956,,513511,0,,0,2,,...,,,0,0,Urban,0,,,Fine,Null
285342,1.768891e+06,5.904246e+06,1318957,,523820,0,0.0,0,2,0.0,...,0.0,0.0,0,0,Urban,0,0.0,0.0,Fine,Null
285343,1.757918e+06,5.914599e+06,1318959,,518600,0,,0,2,,...,,,0,0,Urban,0,,,Fine,Null
285344,1.758255e+06,5.918060e+06,1318961,,517400,0,0.0,0,1,0.0,...,0.0,0.0,0,0,Urban,0,0.0,0.0,Fine,Null


In [None]:
''' disable
# Define the NZTM2000 and WGS84 projections
nztm2000 = Proj('epsg:2193')
wgs84 = Proj('epsg:4326')

# Function to convert NZTM2000 to WGS84
def convert_to_wgs84(row):
    x, y = row['X'], row['Y']
    lon, lat = transform(nztm2000, wgs84, x, y)
    return pd.Series({'longitude': lon, 'latitude': lat})

cas_df_converted = pd.DataFrame(cas_df)

# Apply the conversion function to each row in the DataFrame
cas_df[['longitude', 'latitude']] = cas_df_converted.apply(convert_to_wgs84, axis=1)

# Display the updated DataFrame
print(cas_df)
'''

In [141]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": SA1_coordinates['latitude'],
	"longitude": SA1_coordinates['longitude'],
	"hourly": ["temperature_2m", "weather_code", "cloud_cover", "wind_speed_10m"],
	"daily": ["weather_code", "temperature_2m_min", "sunrise", "sunset", "daylight_duration", "sunshine_duration", "wind_speed_10m_max"],
	"timezone": "Pacific/Auckland",
	"past_days": 92
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(1).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["weather_code"] = hourly_weather_code
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_sunrise = daily.Variables(2).ValuesAsNumpy()
daily_sunset = daily.Variables(3).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(4).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["sunrise"] = daily_sunrise
daily_data["sunset"] = daily_sunset
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)

Coordinates -36.5°N 174.5°E
Elevation 30.0 m asl
Timezone b'Pacific/Auckland' b'NZST'
Timezone difference to GMT+0 43200 s
                          date  temperature_2m  weather_code  cloud_cover  \
0    2024-02-21 12:00:00+00:00         13.4055           1.0         56.0   
1    2024-02-21 13:00:00+00:00         13.0055           0.0          5.0   
2    2024-02-21 14:00:00+00:00         12.6555           0.0          0.0   
3    2024-02-21 15:00:00+00:00         12.5055           0.0          3.0   
4    2024-02-21 16:00:00+00:00         12.3555           1.0         28.0   
...                        ...             ...           ...          ...   
2371 2024-05-30 07:00:00+00:00         14.5055           2.0         67.0   
2372 2024-05-30 08:00:00+00:00         14.4555           2.0         68.0   
2373 2024-05-30 09:00:00+00:00         14.4055           2.0         70.0   
2374 2024-05-30 10:00:00+00:00         14.3055           2.0         68.0   
2375 2024-05-30 11:00:00+00:00

## Weather Condition Mapping

Defined mappings to categorize weather conditions on hourly basis <br>
For weatherA - 'Fine', 'Light rain', 'Mist or Fog', 'Heavy rain', 'Snow', 'Hail or Sleet' based on weather codes<br> 
for weatherB - 'Strong wind' , 'Frost', based on wind speed and temperature. <br>
Used apply method with custom functions to map the weather codes to our defined categories.


## Light Condition Mapping

To determine light conditions ('Bright Sun', 'Overcast', 'Twilight', 'Dark', 'Unknown') based on level of cloud cover percentage and time of day. 



In [142]:
def map_to_light_conditions(row):
    cloudcover = row['cloud_cover']
    hour = row['date'].hour
    
    if hour >= 6 and hour < 17:
        # Daytime
        if cloudcover >= 70 or row['weather_code'] == 3: #more than 80% cloudover or weather_code=overcast
            return 'Overcast'
        else:
            return 'Bright Sun'
    elif (hour >= 5 and hour < 6) or (hour >= 17 and hour < 18):
        # Twilight
        return 'Twilight'
    else:
    #elif hour >= 18 or hour < 5:
        # Nighttime
        return 'Dark'


# Define mapping functions for weatherA, based on weather_code from open-meteo
def map_to_weatherA(row):
    if row['weather_code'] in [71, 73, 75, 77, 85, 86]:
        return 'Snow'
    elif row['weather_code'] in [63, 65, 81, 82]:
        return 'Heavy rain'
    elif row['weather_code'] in [51, 53, 55, 56, 57, 61, 80]:
        return 'Light rain'
    elif row['weather_code'] in [66, 67, 96, 99]:
        return 'Hail or Sleet'
    elif row['weather_code'] in [45, 48]:
        return 'Mist or Fog'
    else:
        return 'Fine'

#for weatherB, based on wind speed and temperature. 
def map_to_weatherB(row):
    if row['wind_speed_10m'] > 30:
        return 'Strong wind'
    elif row['temperature_2m'] <= 0:
        return 'Frost'
    else:
        return 'None'

# Apply the mapping functions
hourly_dataframe['weatherA'] = hourly_dataframe.apply(map_to_weatherA, axis=1)
hourly_dataframe['weatherB'] = hourly_dataframe.apply(map_to_weatherB, axis=1)
hourly_dataframe['light'] = hourly_dataframe.apply(map_to_light_conditions, axis=1)


hourly_dataframe

Unnamed: 0,date,temperature_2m,weather_code,cloud_cover,wind_speed_10m,weatherA,weatherB,light
0,2024-02-21 12:00:00+00:00,13.4055,1.0,56.0,1.800000,Fine,,Bright Sun
1,2024-02-21 13:00:00+00:00,13.0055,0.0,5.0,1.297998,Fine,,Bright Sun
2,2024-02-21 14:00:00+00:00,12.6555,0.0,0.0,1.080000,Fine,,Bright Sun
3,2024-02-21 15:00:00+00:00,12.5055,0.0,3.0,1.138420,Fine,,Bright Sun
4,2024-02-21 16:00:00+00:00,12.3555,1.0,28.0,2.189795,Fine,,Bright Sun
...,...,...,...,...,...,...,...,...
2371,2024-05-30 07:00:00+00:00,14.5055,2.0,67.0,13.830749,Fine,,Bright Sun
2372,2024-05-30 08:00:00+00:00,14.4555,2.0,68.0,12.819235,Fine,,Bright Sun
2373,2024-05-30 09:00:00+00:00,14.4055,2.0,70.0,11.808878,Fine,,Overcast
2374,2024-05-30 10:00:00+00:00,14.3055,2.0,68.0,10.799999,Fine,,Bright Sun


In [143]:
filtered_df = hourly_dataframe[hourly_dataframe['date'].dt.date == pd.Timestamp('2024-05-25').date()]

result = filtered_df[['date', 'weatherA', 'weatherB', 'light']]

In [144]:
result

Unnamed: 0,date,weatherA,weatherB,light
2244,2024-05-25 00:00:00+00:00,Fine,,Dark
2245,2024-05-25 01:00:00+00:00,Fine,,Dark
2246,2024-05-25 02:00:00+00:00,Fine,,Dark
2247,2024-05-25 03:00:00+00:00,Fine,,Dark
2248,2024-05-25 04:00:00+00:00,Fine,,Dark
2249,2024-05-25 05:00:00+00:00,Fine,,Twilight
2250,2024-05-25 06:00:00+00:00,Fine,,Overcast
2251,2024-05-25 07:00:00+00:00,Fine,,Bright Sun
2252,2024-05-25 08:00:00+00:00,Fine,,Bright Sun
2253,2024-05-25 09:00:00+00:00,Fine,,Overcast
