# Create Fine Area Stations

In [6]:
import pandas as pd

## Determine max/min longitude-latitude coordinates

In [8]:
df = pd.read_csv('../data/processing/weather-stations/kelowna/kelowna_stations.csv')

In [9]:
df.sort_values(by=['Latitude'], ascending = True)

Unnamed: 0,Network ID,Network Name,Native ID,Station ID,Station Name,History ID,Province,Longitude,Latitude,Elevation (m),Record Start,Record End,Obs Freq,Variables
78,1,EC,1131410,1112,CARMI,1515,BC,-119.083333,49.500000,1245.0,1924-01-01,1969-03-31,Daily,Precipitation Amount|Rainfall Amount|Snowfall ...
7,1,EC,1126160,243,PENTICTON SEWAGE PLANT,646,BC,-119.600000,49.500000,344.0,1954-10-01,1969-11-30,Daily,Precipitation Amount|Rainfall Amount|Snowfall ...
285,17,ARDA,112254,4998,ROGER RCH DS,6678,BC,-119.777500,49.513333,832.0,1980-10-08,1981-07-22,Daily,Precipitation Amount|Surface Snow Depth (Point)
145,12,FLNRO-WMB,328,1876,PENTICTON RS,2279,BC,-119.553300,49.518300,427.0,1988-08-16,2023-10-08,Hourly,Dew Point Temperature|Precipitation Amount|Pre...
165,12,FLNRO-WMB,393,1936,NICOLL,2339,BC,-118.360300,49.526700,866.0,1988-10-28,2024-01-14,Hourly,Dew Point Temperature|Precipitation Amount|Pre...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,1,EC,1164830,1287,MALAKWA EAGLE R HATCHERY,1690,BC,-118.650000,51.000000,404.0,1924-01-01,2001-01-01,Daily,Precipitation Amount|Precipitation Climatology...
369,17,ARDA,116181,5660,UP ROSS CK KAM,7342,BC,-119.223611,51.000000,858.0,1971-05-12,1973-10-23,Daily,Precipitation (Cumulative)|Surface Snow Depth ...
131,1,EC,1176750,1362,REVELSTOKE,1765,BC,-118.200000,51.000000,456.0,1898-05-01,1969-11-30,Daily,Precipitation Amount|Rainfall Amount|Snowfall ...
167,12,FLNRO-WMB,696,2147,ZZ FOWLER LAKE,2550,BC,-120.000000,51.000000,13.0,1989-05-16,1990-10-05,Daily,Precipitation Amount|Relative Humidity (Mean)|...


In [10]:
longitude_max = df['Longitude'].max()
longitude_min = df['Longitude'].min()
latitude_max = df['Latitude'].max()
latitude_min = df['Latitude'].min()

In [11]:
print(longitude_max, longitude_min, latitude_max, latitude_min)

-118.193333333333 -120.330555555556 51.0 49.5


In [12]:
longitude_distance = longitude_max - longitude_min
latitude_distance = latitude_max - latitude_min
print(longitude_distance, latitude_distance)

2.137222222223002 1.5


## Divide into fine areas

In [30]:
def spatially_split(df, num_long_splits, num_lat_splits):
    # Calculate the maximum and minimum longitude and latitude from the DataFrame
    longitude_max = df['Longitude'].max()
    longitude_min = df['Longitude'].min()
    latitude_max = df['Latitude'].max()
    latitude_min = df['Latitude'].min()

    # Calculate the total distance covered by the stations in both longitude and latitude
    longitude_distance = longitude_max - longitude_min
    latitude_distance = latitude_max - latitude_min
    
    # Determine the size of each split in longitude and latitude
    long_split_distance = longitude_distance / num_long_splits
    lat_split_distance = latitude_distance / num_lat_splits
    
    # Initialize tracking variables for longitude and latitude
    long_track = longitude_min
    lat_track = latitude_min
    
    # Print the geographical bounds and split distances for debugging purposes
    print(f"min_long = {longitude_min}, min_lat = {latitude_min}, max_long = {longitude_max}, max_lat = {latitude_max}, long_split = {long_split_distance}, lat_split = {lat_split_distance}")
    
    # Initialize counters for regions with zero stations and regions with noticeable burn areas
    zero_count = 0
    num_above = 0
    
    # Iterate through each split in latitude
    for i in range(num_lat_splits):
        lat_track = latitude_min + i * lat_split_distance  # Update the latitude tracker
        # Iterate through each split in longitude within the current latitude split
        for j in range(num_long_splits):
            long_track = longitude_min + j * long_split_distance  # Update the longitude tracker
            
            # Create a mask to filter stations within the current latitude and longitude split
            latitude_mask_station = (df['Latitude'] < lat_track + lat_split_distance) & (df['Latitude'] >= lat_track)
            longitude_mask_station = (df['Longitude'] < long_track + long_split_distance) & (df['Longitude'] >= long_track)
            
            # Create a mask to filter burn data within the current latitude and longitude split
            latitude_mask_burn = (burn_data['LATITUDE'] < lat_track + lat_split_distance) & (burn_data['LATITUDE'] >= lat_track)
            longitude_mask_burn = (burn_data['LONGITUDE'] < long_track + long_split_distance) & (burn_data['LONGITUDE'] >= long_track)
            
            # Filter the stations DataFrame based on the current split masks
            station_df = df[latitude_mask_station & longitude_mask_station]
            
            # Sum the fire sizes within the current split from the burn data
            single_burn = burn_data[latitude_mask_burn & longitude_mask_burn]['FIRE_SIZE_HA'].sum(axis=0)
            total_burn.append(single_burn)  # Store the burn data in a list
            
            # Print the total burn for the current split for debugging purposes
            print(f"total burn is {single_burn}")
            
            # Check if the burn area is above a threshold (e.g., 0.005) and increment the counter if true
            if single_burn >= 0.005:
                num_above += 1
                
            # If no stations are found in the current split, increment the zero_count
            if len(station_df) == 0: 
                zero_count += 1
            
            # Save the filtered stations DataFrame to a CSV file named by the split index
            station_df.to_csv(f"../data/processing/weather-stations/kelowna/fine_area_{i*num_long_splits+j+1}_stations.csv")
            
            # Print the number of stations in the current split for debugging purposes
            print(f"num of stations to aggregate is {len(station_df)}")
            
        # Reset the longitude tracker for the next latitude split
        long_track = longitude_min

    # Print the count of regions with zero stations and regions with noticeable burn areas
    print(f"zero count is {zero_count}")
    print(f"num burn above 0.05 is {num_above}")

In [31]:
spatially_split(df, 6, 6)

min_long = -120.330555555556, min_lat = 49.5, max_long = -118.193333333333, max_lat = 51.0, long_split = 0.3562037037038337, lat_split = 0.25
total burn is 0.0
num of stations to aggregate is 15
total burn is 0.0
num of stations to aggregate is 87
total burn is 0.0
num of stations to aggregate is 14
total burn is 0.0
num of stations to aggregate is 11
total burn is 0.0
num of stations to aggregate is 6
total burn is 0.0
num of stations to aggregate is 1
total burn is 0.0
num of stations to aggregate is 6
total burn is 0.0
num of stations to aggregate is 18
total burn is 0.0
num of stations to aggregate is 55
total burn is 0.0
num of stations to aggregate is 10
total burn is 0.0
num of stations to aggregate is 8
total burn is 0.0
num of stations to aggregate is 5
total burn is 0.0
num of stations to aggregate is 8
total burn is 0.0
num of stations to aggregate is 5
total burn is 0.0
num of stations to aggregate is 19
total burn is 0.0
num of stations to aggregate is 14
total burn is 0.0