#### Extraction code logic for all Shapefile Files

In [1]:
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import json
from scipy import sparse
import random
import os
from sys import getsizeof
from shapely.geometry import Polygon
from shapely import Point
from shapely.geometry import MultiPoint
from shapely import ops

coords = json.load(open('../Extract/coords.json'))
mpl.rcParams['figure.dpi'] = 120
# mpl.rcParams['savefig.pad_inches'] = 0

random.seed(10) # SO WE GET THE SAME DATASET EVERY TIME

# This code is processed 
# I have manually processed every file using this format

directory = r"D:\Users\xubil\OneDrive\Documents\Wildfires Data NPZ\Training"
data = {}

In [2]:
target_limit =  "limits_4326" #"limits_testing_9_chunks"
xmin, xmax, ymin, ymax = (coords[target_limit]["xmin"]), (coords[target_limit]["xmax"]), (coords[target_limit]["ymin"]), (coords[target_limit]["ymax"])

'''
where new_image is a figure.canvas.buffer_rgba() turned into a np matrix: 

nonzero_rows, nonzero_cols = np.nonzero(new_image) # Get all nonzero rows & collumns 

min_row, max_row = np.min(nonzero_rows), np.max(nonzero_rows)
min_col, max_col = np.min(nonzero_cols), np.max(nonzero_cols)
# After a series of test, (0, 575, 3, 764) was the exact fit of the canvas when the ration between width:height = 2:1

'''

min_row, max_row, min_col, max_col = 0, 575, 3, 764 
xyratio = 2/1

resx = 0.2
resy = resx/xyratio
n_chunkx = int(round((xmax-xmin)/resx, 1)) # MAKE SURE YOU CAN MATH: because we convert to int if you get 0.1232131 sketch 
n_chunky = int(round((ymax-ymin)/resy, 1)) # We're using round to not get like 2.9999999999999999997 make sure to get 0.3

# For Final Extraction: 
# x: 19.8/99 = 0.2 per chunk for 99 chunks
# y: 8/80 = 0.1 per chunk for 80 chunks

# For this file more specifically, we want to process an area around the point. Let us use the size of a chunk: 0.05 by 0.05
mat_h, mat_w = 0.008, 0.012

In [3]:
gdf = gpd.read_file(r"D:\Users\xubil\OneDrive\Documents\Wildfires Data\Feux_pt_ori_SHP\FEUX_PT_ORI_1972_2022.shp") # Path to the shapefile 

In [4]:
causes = ["Humaine", "Foudre"]
gdf = gdf.to_crs(4326)

In [5]:
gdf

Unnamed: 0,OBJECTID,CLE,ANNEE,NOFEU,SECTION,CAUSE,DATE_DEBUT,DATE_RAPPO,DATE_ETEIN,SUP_HA,LATITUDE,LONGITUDE,geometry
0,1,20221080001,2022,1,Intensive,Humaine,2022-04-06,2022-04-06,2022-04-06,0.4,45.1289,-72.1135,POINT (-72.11350 45.12890)
1,2,20221080002,2022,2,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.5,45.6624,-74.3352,POINT (-74.33520 45.66240)
2,3,20221080003,2022,3,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.0,45.6734,-74.3641,POINT (-74.36410 45.67340)
3,4,20221080004,2022,4,Intensive,Humaine,2022-04-12,2022-04-12,2022-04-12,0.6,45.9701,-77.1305,POINT (-77.13050 45.97010)
4,5,20221080005,2022,5,Intensive,Humaine,2022-04-18,2022-04-18,2022-04-18,0.4,45.7227,-74.3818,POINT (-74.38180 45.72270)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43235,0,20211080621,2021,621,Intensive,Humaine,2021-10-28,2021-10-28,2021-10-28,0.0,45.4606,-72.1554,POINT (-72.15540 45.46060)
43236,0,20211080622,2021,622,Intensive,Humaine,2021-11-09,2021-11-09,2021-11-11,2.0,45.9512,-75.8547,POINT (-75.85470 45.95120)
43237,0,20211080623,2021,623,Intensive,Humaine,2021-11-11,2021-11-11,2021-11-11,0.0,45.9812,-74.2104,POINT (-74.21040 45.98120)
43238,0,20211080624,2021,624,Intensive,Humaine,2021-11-10,2021-11-16,2021-11-16,0.0,45.3931,-72.1209,POINT (-72.12090 45.39310)


In [6]:
gdf[gdf["CAUSE"] == "Foudre"].shape

(11908, 13)

### Process Training Data

In [20]:
count = 0

random.seed(10) # MAKE SURE WE GET THE SAME BOUNDS EVERY TIME :))

for cause in causes:
    print(cause)
    trainingInputCoords = []
    trainingInputDates = []

    for point in gdf[gdf["CAUSE"] == cause].iterrows():
        rand = random.random() # Set seed makes the dataset generated always the same <3
        # rand 0-1 number determines the where exactly the grid cropped (only if possible if we are at bounds it will auto-adjust to fit bounds)

        # # # # # # # # #^
        #               #✓ The length of height and width offset is the same!!! 
        #    P          #
        #               #     
        #               #
        #               #
        # # # # # # # # #
        #<->

        # print(point)
        # print(count)

        rxcoord, rycoord = point[1]["geometry"].bounds[0], point[1]["geometry"].bounds[1]

        # print(rxcoord, rycoord)

        if not (rxcoord >= xmin and rxcoord <= xmax and rycoord >= ymin and rycoord <= ymax):
            continue

        # print(xcoord, ycoord)

        min_x, min_y = round(rxcoord-int(rand*mat_w), 3), round(rycoord-int(rand*mat_h), 3)
        max_x, max_y = min_x+mat_w, min_y+mat_h

        # print(min_x, max_x, min_y, max_y)

        # Shift the square if it is out of boundsss 
        if (min_x < xmin):
            min_x = xmin
            max_x = xmin+mat_w
        if (min_y < ymin):
            min_y = ymin
            max_y = ymin+mat_w
        if (max_x > xmax):
            max_x = xmax
            min_x = xmax-mat_h
        if (max_y >= ymax):
            max_y = ymax-1
            min_y = ymax-mat_h
        
        trainingInputCoords.append((min_x, max_x, min_y, max_y))
        trainingInputDates.append(point[1]["DATE_DEBUT"])

        print(min_x, max_x, min_y, max_y)

        count+=1

        # if (count > 100):
        #     break
        # count+=1
    # break
    np.save('../data-training/'+cause, trainingInputCoords)
    np.save('../data-training/'+cause+"-dates", trainingInputDates)
    del trainingInputDates
    del trainingInputCoords

    # break

Humaine
-72.113 -72.101 45.129 45.137
-74.335 -74.323 45.662 45.67
-74.364 -74.352 45.673 45.681000000000004
-77.13 -77.118 45.97 45.978
-74.382 -74.37 45.723 45.731
-74.41 -74.398 45.668 45.676
-72.791 -72.779 45.319 45.327000000000005
-73.1 -73.088 46.003 46.011
-77.89 -77.878 48.23 48.238
-74.467 -74.455 45.928 45.936
-74.485 -74.473 45.628 45.636
-72.69 -72.678 45.708 45.716
-72.288 -72.276 45.173 45.181000000000004
-73.561 -73.549 46.328 46.336000000000006
-72.273 -72.261 46.129 46.137
-72.034 -72.022 46.913 46.921
-72.713 -72.701 46.466 46.474000000000004
-73.422 -73.41 45.887 45.895
-72.288 -72.276 46.523 46.531000000000006
-76.433 -76.421 45.875 45.883
-76.113 -76.101 46.0 46.008
-72.744 -72.732 46.319 46.327000000000005
-79.095 -79.083 46.717 46.725
-72.937 -72.925 47.038 47.046
-79.52 -79.508 47.369 47.377
-74.12 -74.108 45.799 45.807
-74.876 -74.864 45.893 45.901
-72.721 -72.709 46.323 46.331
-76.001 -75.989 46.279 46.287000000000006
-76.343 -76.331 45.677 45.685
-76.187 -76

### Similarly, we can create a code to generate training data where no fire events occured

In [8]:
def pointInRect(point,rect):
    x1, y1, x2, y2 = rect
    # x2, y2 = x1+w, y1+h
    x, y = point
    if (x1 < x and x < x2):
        if (y1 < y and y < y2):
            return True
    return False

In [9]:
points = []

for point in gdf.iterrows():
    checkx, checky = point[1]["geometry"].bounds[0], point[1]["geometry"].bounds[1]
    points.append([checkx, checky])

In [12]:
ob = MultiPoint(points)

In [13]:
count = 0

random.seed(11) # MAKE SURE WE GET THE SAME BOUNDS EVERY TIME :))
trainingInputCoords = []

for i in range(42000): # About the same number of entries as Humaine and Foudre combined

    movingOn = False
    
    while(not movingOn):
        rand = random.random() 

        rxcoord, rycoord = (random.random()*(xmax-xmin)+xmin), (random.random()*(ymax-ymin)+ymin) # Generate two points between the bounds

        min_x, min_y = round(rxcoord-int(rand*mat_w), 3), round(rycoord-int(rand*mat_h), 3)
        max_x, max_y = min_x+mat_w, min_y+mat_h

        # print(min_x, max_x, min_y, max_y)

        # Shift the square if it is out of boundsss 
        if (min_x < xmin):
            min_x = xmin
            max_x = xmin+mat_w
        if (min_y < ymin):
            min_y = ymin
            max_y = ymin+mat_w
        if (max_x > xmax):
            max_x = xmax
            min_x = xmax-mat_h
        if (max_y >= ymax):
            max_y = ymax-1
            min_y = ymax-mat_h

        poly = Polygon([[min_x, min_y], [min_x, max_y], [max_x, max_y], [max_x, min_y]])

        # print(gdf.bounds)


        if (poly.intersects(ob)):
            print("Contains Point!")
        else:
            # print("Doesn't Contain!")
            movingOn = True
        
        # for point in gdf.iterrows():
        #     checkx, checky = point[1]["geometry"].bounds[0], point[1]["geometry"].bounds[1]
        #     if not pointInRect((checkx, checky), (min_x, max_x, min_y, max_y)):
        #         movingOn = True
            
    trainingInputCoords.append((min_x, max_x, min_y, max_y))

    # print(min_x, max_x, min_y, max_y)

    # if count == 0:
    #     break

    count+=1
    print(count)

np.save('../data-training/Sans-Feu', trainingInputCoords)
del trainingInputCoords

Contains Point!
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
Contains Point!
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
Contains Point!
222
223
224
Contains Point!
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
Contains Point!
250
251
252
253
254
255
256
257


### And one more time for Data with Temporal Data

In [14]:
count = 0

random.seed(11) # MAKE SURE WE GET THE SAME BOUNDS EVERY TIME :))
trainingInputCoords = []

for i in range(42000): # About the same number of entries as Humaine and Foudre combined

    movingOn = False
    
    rand = random.random() 

    rxcoord, rycoord = (random.random()*(xmax-xmin)+xmin), (random.random()*(ymax-ymin)+ymin) # Generate two points between the bounds

    min_x, min_y = round(rxcoord-int(rand*mat_w), 3), round(rycoord-int(rand*mat_h), 3)
    max_x, max_y = min_x+mat_w, min_y+mat_h

        # print(min_x, max_x, min_y, max_y)

        # Shift the square if it is out of boundsss 
    if (min_x < xmin):
        min_x = xmin
        max_x = xmin+mat_w
    if (min_y < ymin):
        min_y = ymin
        max_y = ymin+mat_w
    if (max_x > xmax):
        max_x = xmax
        min_x = xmax-mat_h
    if (max_y >= ymax):
        max_y = ymax-1
        min_y = ymax-mat_h
            
    trainingInputCoords.append((min_x, max_x, min_y, max_y))

    print(min_x, max_x, min_y, max_y)

    count+=1

np.save('../data-training/Sans-Feu-Temp', trainingInputCoords)
del trainingInputCoords

-69.317 -69.30499999999999 51.994 52.002
-70.345 -70.333 49.299 49.307
-70.264 -70.252 49.639 49.647000000000006
-78.536 -78.524 47.027 47.035000000000004
-64.369 -64.357 50.148 50.156000000000006
-60.953 -60.941 52.318 52.326
-68.212 -68.2 45.86 45.868
-69.938 -69.926 45.076 45.084
-75.61 -75.598 44.841 44.849000000000004
-71.677 -71.665 51.339 51.347
-67.722 -67.71 48.598 48.606
-71.345 -71.333 46.825 46.833000000000006
-60.685 -60.673 51.322 51.330000000000005
-74.158 -74.146 46.437 46.445
-79.01 -78.998 50.73 50.738
-63.638 -63.626 47.692 47.7
-63.623 -63.611 44.604 44.612
-62.377 -62.365 48.36 48.368
-72.531 -72.519 45.184 45.192
-64.985 -64.973 46.758 46.766000000000005
-73.815 -73.803 52.313 52.321000000000005
-78.064 -78.05199999999999 46.571 46.579
-79.214 -79.202 50.976 50.984
-69.326 -69.314 48.179 48.187000000000005
-65.908 -65.896 45.648 45.656000000000006
-78.093 -78.081 47.966 47.974000000000004
-75.058 -75.046 52.367 52.375
-74.378 -74.366 51.679 51.687000000000005
-72.