#### Extraction code logic for all Shapefile Files

In [17]:
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import json
from scipy import sparse
import random
import os
from sys import getsizeof

coords = json.load(open('../Extract/coords.json'))
mpl.rcParams['figure.dpi'] = 120
# mpl.rcParams['savefig.pad_inches'] = 0

random.seed(10) # SO WE GET THE SAME DATASET EVERY TIME

# This code is processed 
# I have manually processed every file using this format

directory = r"D:\Users\xubil\OneDrive\Documents\Wildfires Data NPZ\Training"
data = {}

In [18]:
target_limit =  "limits_4326" #"limits_testing_9_chunks"
xmin, xmax, ymin, ymax = (coords[target_limit]["xmin"]), (coords[target_limit]["xmax"]), (coords[target_limit]["ymin"]), (coords[target_limit]["ymax"])

'''
where new_image is a figure.canvas.buffer_rgba() turned into a np matrix: 

nonzero_rows, nonzero_cols = np.nonzero(new_image) # Get all nonzero rows & collumns 

min_row, max_row = np.min(nonzero_rows), np.max(nonzero_rows)
min_col, max_col = np.min(nonzero_cols), np.max(nonzero_cols)
# After a series of test, (0, 575, 3, 764) was the exact fit of the canvas when the ration between width:height = 2:1

'''

min_row, max_row, min_col, max_col = 0, 575, 3, 764 
xyratio = 2/1

resx = 0.2
resy = resx/xyratio
n_chunkx = int(round((xmax-xmin)/resx, 1)) # MAKE SURE YOU CAN MATH: because we convert to int if you get 0.1232131 sketch 
n_chunky = int(round((ymax-ymin)/resy, 1)) # We're using round to not get like 2.9999999999999999997 make sure to get 0.3

# For Final Extraction: 
# x: 19.8/99 = 0.2 per chunk for 99 chunks
# y: 8/80 = 0.1 per chunk for 80 chunks

# For this file more specifically, we want to process an area around the point. Let us use the size of a chunk: 0.2 for x and 0.1 for y
mat_l, mat_w = 0.1, 0.2

In [19]:
gdf = gpd.read_file(r"D:\Users\xubil\OneDrive\Documents\Wildfires Data\Feux_pt_ori_SHP\FEUX_PT_ORI_1972_2022.shp") # Path to the shapefile 

In [20]:
causes = ["Humaine", "Foudre"]
gdf = gdf.to_crs(4326)

In [21]:
gdf

Unnamed: 0,OBJECTID,CLE,ANNEE,NOFEU,SECTION,CAUSE,DATE_DEBUT,DATE_RAPPO,DATE_ETEIN,SUP_HA,LATITUDE,LONGITUDE,geometry
0,1,20221080001,2022,1,Intensive,Humaine,2022-04-06,2022-04-06,2022-04-06,0.4,45.1289,-72.1135,POINT (-72.11350 45.12890)
1,2,20221080002,2022,2,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.5,45.6624,-74.3352,POINT (-74.33520 45.66240)
2,3,20221080003,2022,3,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.0,45.6734,-74.3641,POINT (-74.36410 45.67340)
3,4,20221080004,2022,4,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.6,45.9701,-77.1305,POINT (-77.13050 45.97010)
4,5,20221080005,2022,5,Intensive,Humaine,2022-04-18,2022-04-18,2022-04-18,0.4,45.7227,-74.3818,POINT (-74.38180 45.72270)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43235,0,20211080621,2021,621,Intensive,Humaine,2021-10-28,2021-10-28,2021-10-28,0.0,45.4606,-72.1554,POINT (-72.15540 45.46060)
43236,0,20211080622,2021,622,Intensive,Humaine,2021-11-09,2021-11-09,2021-11-11,2.0,45.9512,-75.8547,POINT (-75.85470 45.95120)
43237,0,20211080623,2021,623,Intensive,Humaine,2021-11-11,2021-11-11,2021-11-11,0.0,45.9812,-74.2104,POINT (-74.21040 45.98120)
43238,0,20211080624,2021,624,Intensive,Humaine,2021-11-10,2021-11-16,2021-11-16,0.0,45.3931,-72.1209,POINT (-72.12090 45.39310)


In [22]:
gdf[gdf["CAUSE"] == "Foudre"].shape

(11908, 13)

### Process Training Data

In [23]:
count = 0

random.seed(10) # MAKE SURE WE GET THE SAME BOUNDS EVERY TIME :))

for cause in causes:
    print(cause)
    trainingInputCoords = []

    for point in gdf[gdf["CAUSE"] == cause].iterrows():
        rand = random.random() # Set seed makes the dataset generated always the same <3
        # rand 0-1 number determines the where exactly the grid cropped (only if possible if we are at bounds it will auto-adjust to fit bounds)

        # # # # # # # # #^
        #               #✓ The length of height and width offset is the same!!! 
        #    P          #
        #               #     
        #               #
        #               #
        # # # # # # # # #
        #<->

        # print(point)
        # print(count)

        rxcoord, rycoord = point[1]["geometry"].bounds[0], point[1]["geometry"].bounds[1]

        # print(rxcoord, rycoord)

        if not (rxcoord >= xmin and rxcoord <= xmax and rycoord >= ymin and rycoord <= ymax):
            continue

        # print(xcoord, ycoord)

        min_x, min_y = round(rxcoord-int(rand*mat_l), 3), round(rycoord-int(rand*mat_l), 3)
        max_x, max_y = min_x+mat_l, min_y+mat_l

        # print(min_x, max_x, min_y, max_y)

        # Shift the square if it is out of boundsss 
        if (min_x < xmin):
            min_x = xmin
            max_x = xmin+mat_l
        if (min_y < ymin):
            min_y = ymin
            max_y = ymin+mat_l
        if (max_x > xmax):
            max_x = xmax
            min_x = xmax-mat_l
        if (max_y >= ymax):
            max_y = ymax-1
            min_y = ymax-mat_l
        
        trainingInputCoords.append((min_x, max_x, min_y, max_y))

        print(min_x, max_x, min_y, max_y)

        count+=1

        # if (count > 100):
        #     break
        # count+=1
    # break
    np.save('../data-training/'+cause, trainingInputCoords)
    del trainingInputCoords

    # break

Humaine
-72.113 -72.013 45.129 45.229
-74.335 -74.235 45.662 45.762
-74.364 -74.26400000000001 45.673 45.773
-77.13 -77.03 45.97 46.07
-74.382 -74.28200000000001 45.723 45.823
-74.41 -74.31 45.668 45.768
-72.791 -72.691 45.319 45.419000000000004
-73.1 -73.0 46.003 46.103
-77.89 -77.79 48.23 48.33
-74.467 -74.367 45.928 46.028
-74.485 -74.385 45.628 45.728
-72.69 -72.59 45.708 45.808
-72.288 -72.188 45.173 45.273
-73.561 -73.46100000000001 46.328 46.428000000000004
-72.273 -72.173 46.129 46.229
-72.034 -71.93400000000001 46.913 47.013
-72.713 -72.613 46.466 46.566
-73.422 -73.322 45.887 45.987
-72.288 -72.188 46.523 46.623000000000005
-76.433 -76.33300000000001 45.875 45.975
-76.113 -76.013 46.0 46.1
-72.744 -72.644 46.319 46.419000000000004
-79.095 -78.995 46.717 46.817
-72.937 -72.837 47.038 47.138
-79.52 -79.42 47.369 47.469
-74.12 -74.02000000000001 45.799 45.899
-74.876 -74.77600000000001 45.893 45.993
-72.721 -72.62100000000001 46.323 46.423
-76.001 -75.90100000000001 46.279 46.37

### Similarly, we can create a code to generate training data where no fire events occured

In [27]:
def pointInRect(point,rect):
    x1, y1, x2, y2 = rect
    # x2, y2 = x1+w, y1+h
    x, y = point
    if (x1 < x and x < x2):
        if (y1 < y and y < y2):
            return True
    return False

In [28]:
count = 0

random.seed(11) # MAKE SURE WE GET THE SAME BOUNDS EVERY TIME :))
trainingInputCoords = []

for i in range(42000): # About the same number of entries as Humaine and Foudre combined

    movingOn = False
    
    while(not movingOn):
        rand = random.random() 

        rxcoord, rycoord = (random.random()*(xmax-xmin)+xmin), (random.random()*(ymax-ymin)+ymin) # Generate two points between the bounds

        min_x, min_y = round(rxcoord-int(rand*mat_l), 3), round(rycoord-int(rand*mat_l), 3)
        max_x, max_y = min_x+mat_l, min_y+mat_l


        # Shift the square if it is out of boundsss 
        if (min_x < xmin):
            min_x = xmin
            max_x = xmin+mat_l
        if (min_y < ymin):
            min_y = ymin
            max_y = ymin+mat_l
        if (max_x > xmax):
            max_x = xmax
            min_x = xmax-mat_l
        if (max_y >= ymax):
            max_y = ymax-1
            min_y = ymax-mat_l
        
        for point in gdf.iterrows():
            checkx, checky = point[1]["geometry"].bounds[0], point[1]["geometry"].bounds[1]
            if not pointInRect((checkx, checky), (min_x, max_x, min_y, max_y)):
                movingOn = True
            
    trainingInputCoords.append((min_x, max_x, min_y, max_y))

    print(min_x, max_x, min_y, max_y)

    count+=1

np.save('../data-training/Sans-Feu', trainingInputCoords)
del trainingInputCoords

-69.317 -69.217 51.994 52.094
-70.345 -70.245 49.299 49.399
-70.264 -70.164 49.639 49.739000000000004
-78.536 -78.436 47.027 47.127
-64.369 -64.269 50.148 50.248000000000005
-60.953 -60.853 52.318 52.418
-68.212 -68.11200000000001 45.86 45.96
-69.938 -69.83800000000001 45.076 45.176
-75.61 -75.51 44.841 44.941
-71.677 -71.57700000000001 51.339 51.439
-67.722 -67.622 48.598 48.698
-71.345 -71.245 46.825 46.925000000000004


KeyboardInterrupt: 