In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from scipy import sparse
from sys import getsizeof
import cv2
import numpy as np
import os
import datetime as dt
import json
import sys

data = {}
processed = {}
coords = {
    "limits_4326": {
        "xmin":-80.4,
        "xmax": -60.6,
        "ymin": 44.6,
        "ymax": 52.6
    }
}

target_limit =  "limits_4326" #"limits_testing_9_chunks"
xmin, xmax, ymin, ymax = (coords[target_limit]["xmin"]), (coords[target_limit]["xmax"]), (coords[target_limit]["ymin"]), (coords[target_limit]["ymax"])

chunksDirectory = "drive/MyDrive/Wildfire Research Project/Wildfires Data/Chunks Yesterday"
chunksDirectory2 = "drive/MyDrive/Wildfire Research Project/Wildfires Data/Chunks"

In [None]:
DataOrder = {
    4011: "Thunder", # 0 to 4010
    13433: "Human", # 4011 to 13432
    28433: "No-Fire", # 13433 to 28432
    43433: "No-Fire-Data" # 28433 to 43432
}

DataIntervals = {
    "Thunder": (0, 4010),
    "Human": (4011, 13432),
    "No-Fire": (13433, 28432),
    "No-Fire-Data": (28433, 43432)
}

DataTypeToOutput = {
    "Thunder": 1,
    "Human": 1,
    "No-Fire": 0,
    "No-Fire-Data": 0
}

In [None]:
Training = {}
Training["Thunder"] = np.load("drive/MyDrive/Wildfire Research Project/Wildfires Data/Foudre.npy")
Training["Human"] = np.load("drive/MyDrive/Wildfire Research Project/Wildfires Data/Humaine.npy")
Training["No-Fire"] = np.load("drive/MyDrive/Wildfire Research Project/Wildfires Data/Sans-Feu.npy")
Training["No-Fire-Data"] = np.load("drive/MyDrive/Wildfire Research Project/Wildfires Data/Sans-Feu-Temp.npy")
Training["Fire"] = np.concatenate((Training["Thunder"], Training["Human"]))

In [None]:
shapefile_datasets = []
directory = "drive/MyDrive/Wildfire Research Project/Wildfires Data/Training"

for filename in os.listdir(directory):
    print("Reading from ",filename)
    filedir = os.path.join(directory, filename)
    if (filename == "fires" or filename == "Ignore"):
        continue

    shapefile_datasets.append(sparse.load_npz(filedir).tocsc())

Reading from  Hydrographie.npz
Reading from  Route.npz
Reading from  Lignes.npz


In [None]:
ignoredBadFiles = []

### Utils

In [None]:
def boundsToMat(bounds, datashape):
  diffx = xmax-xmin
  diffy = ymax-ymin
  width = datashape[1]
  height = datashape[0]
  return (int(((bounds[0]-xmin)/diffx)*width),
   int(((bounds[1]-xmin)/diffx)*width),
   int(((bounds[2]-ymin)/diffy)*height),
   int(((bounds[3]-ymin)/diffy)*height))

"some of the files have ratio (7,6) for weather others have (6,6) or (6,7)
and for the others some files have (12,12) others have (11,12) and others have (12, 13)
therefore to maintain consistency do i just use (6,6) and (11, 11)"

## Preprocessing Training Data

In [None]:
fileOrder = ["prcp", "tmax", "tmin", "EVI", "NDVI", "Lai"]
otherFilesOrder = ["hydrography", "roads", "transmission"]
itemRatios = [(6,6), (6,6), (6,6), (11, 11), (11,11), (11,11)]

In [None]:
def getTrainingData(filename, chunkN):
  trainInput = np.array([])
  trainOutput = [0]

  dataType = ""

  for dataType in DataIntervals.keys():
    if (DataIntervals[dataType][0] <= chunkN and DataIntervals[dataType][1] >= chunkN):
      trainOutput = [DataTypeToOutput[dataType]]
      break

  data = np.load(chunksDirectory+"/"+filename)
  if (data.shape[0] != 6):
    print(filename, "is actually bad")
    ignoredBadFiles.append(filename)
    return None, None, None

  print(data.shape[0])
  for i in range(data.shape[0]):
    # Empty mat with focus data only
    # matData = data[i, ~np.all(data[i]==0, axis=0)]
    # matData = matData[:, ~np.all(data[i]==0, axis=1)]
    print(i)
    matData = data[i, :itemRatios[i][0], :itemRatios[i][1]].ravel()
    trainInput = np.concatenate((trainInput, matData))

  bounds = Training[dataType][chunkN-DataIntervals[dataType][0]]
  for dataset in shapefile_datasets:
    minx, maxx, miny, maxy = boundsToMat(bounds, dataset.shape)
    data_raw = dataset[miny:maxy, minx:maxx].todense()
    if (data_raw.size == 0):
      print("Empty List")
      trainInput = np.concatenate((trainInput, np.zeros(12*12))) # We'll see about this maybe discard the data too
      continue
    data_process = cv2.resize(data_raw, dsize=(12, 12), interpolation=cv2.INTER_CUBIC).ravel()
    trainInput = np.concatenate((trainInput, data_process))

  # print(trainInput.shape)
  # print(trainOutput)
  print("OK")
  print(sys.getsizeof(trainInput))
  print(sys.getsizeof(trainOutput))
  return trainInput, trainOutput, dataType

In [None]:
def getTrainingData2(filename, chunkN):
  trainInput = np.array([])
  trainOutput = [0]

  dataType = ""

  for dataType in DataIntervals.keys():
    if (DataIntervals[dataType][0] <= chunkN and DataIntervals[dataType][1] >= chunkN):
      trainOutput = [DataTypeToOutput[dataType]]
      break

  data = np.load(chunksDirectory2+"/"+filename)
  if (data.shape[0] != 6):
    print(filename, "is actually bad")
    ignoredBadFiles.append(filename)
    return None, None, None

  print(data.shape[0])
  for i in range(data.shape[0]):
    # Empty mat with focus data only
    # matData = data[i, ~np.all(data[i]==0, axis=0)]
    # matData = matData[:, ~np.all(data[i]==0, axis=1)]
    print(i)
    matData = data[i, :itemRatios[i][0], :itemRatios[i][1]].ravel()
    trainInput = np.concatenate((trainInput, matData))

  bounds = Training[dataType][chunkN-DataIntervals[dataType][0]]
  for dataset in shapefile_datasets:
    minx, maxx, miny, maxy = boundsToMat(bounds, dataset.shape)
    data_raw = dataset[miny:maxy, minx:maxx].todense()
    if (data_raw.size == 0):
      print("Empty List")
      trainInput = np.concatenate((trainInput, np.zeros(12*12))) # We'll see about this maybe discard the data too
      continue
    data_process = cv2.resize(data_raw, dsize=(12, 12), interpolation=cv2.INTER_CUBIC).ravel()
    trainInput = np.concatenate((trainInput, data_process))

  # print(trainInput.shape)
  # print(trainOutput)
  print("OK")
  print(sys.getsizeof(trainInput))
  print(sys.getsizeof(trainOutput))
  return trainInput, trainOutput, dataType

In [None]:
# Reset some values hotkey
visited = {}
ignoredBadFiles = []

In [None]:
savedVisited = visited.copy()

In [None]:
visited, ignoredBadFiles

({}, [])

In [None]:
trainX = []
trainY = []

In [None]:
index = 0

for filename in os.listdir(chunksDirectory):
  print(filename)

  if (filename == "Chunks"): continue
  if ("bad" in filename):
    continue
  chunkN = int(filename.split(".")[0])
  if (chunkN in visited): continue
  if (chunkN >= DataIntervals["No-Fire"][0] and chunkN <= DataIntervals["No-Fire"][1]):
    print("Out of bounds for this one lol")
    continue
  visited[chunkN] = True

  trainInput, trainOutput, dataType = getTrainingData(filename, chunkN)
  if (trainInput is None):
    continue

  trainX.append(trainInput)
  trainY.append(trainOutput)

  # if (index >= 1):
  #   break

  # index+=1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
6
0
1
2
3
4
5
OK
7336
64
1339.npy
6
0
1
2
3
4
5
OK
7336
64
935.npy
6
0
1
2
3
4
5
OK
7336
64
1031.npy
6
0
1
2
3
4
5
OK
7336
64
1089.npy
6
0
1
2
3
4
5
OK
7336
64
1146.npy
6
0
1
2
3
4
5
OK
7336
64
1205.npy
6
0
1
2
3
4
5
OK
7336
64
728.npy
6
0
1
2
3
4
5
OK
7336
64
1595.npy
6
0
1
2
3
4
5
OK
7336
64
790.npy
6
0
1
2
3
4
5
OK
7336
64
1567.npy
6
0
1
2
3
4
5
OK
7336
64
1638.npy
6
0
1
2
3
4
5
OK
7336
64
1166.npy
6
0
1
2
3
4
5
OK
7336
64
789.npy
6
0
1
2
3
4
5
OK
7336
64
1581.npy
6
0
1
2
3
4
5
OK
7336
64
1585.npy
6
0
1
2
3
4
5
OK
7336
64
920.npy
6
0
1
2
3
4
5
OK
7336
64
1522.npy
6
0
1
2
3
4
5
OK
7336
64
1135.npy
6
0
1
2
3
4
5
OK
7336
64
1548.npy
6
0
1
2
3
4
5
OK
7336
64
15.npy
6
0
1
2
3
4
5
OK
7336
64
929.npy
6
0
1
2
3
4
5
OK
7336
64
1058.npy
6
0
1
2
3
4
5
OK
7336
64
872.npy
6
0
1
2
3
4
5
OK
7336
64
1566.npy
6
0
1
2
3
4
5
OK
7336
64
799.npy
6
0
1
2
3
4
5
OK
7336
64
108.npy
6
0
1
2
3
4
5
OK
7336
64
308.npy
6
0
1
2
3
4
5
OK
7336
64
721.

In [None]:
index = 0

for filename in os.listdir(chunksDirectory2):
  print(filename)

  if (filename == "Chunks"): continue
  if ("bad" in filename):
    continue
  chunkN = int(filename.split(".")[0])
  if (chunkN in visited): continue
  if (chunkN <= DataIntervals["No-Fire"][1]): # If it's not a non-fire occurence
    continue
  visited[chunkN] = True

  trainInput, trainOutput, dataType = getTrainingData2(filename, chunkN)
  if (trainInput is None):
    continue

  trainX.append(trainInput)
  trainY.append(trainOutput)

  if (index >= 1670):
    break

  index+=1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
34036.npy is actually bad
33958.npy
6
0
1
2
3
4
5
OK
7336
64
41989.npy
6
0
1
2
3
4
5
OK
7336
64
41833.npy
6
0
1
2
3
4
5
OK
7336
64
42068.npy
6
0
1
2
3
4
5
OK
7336
64
40215.npy
6
0
1
2
3
4
5
OK
7336
64
41697.npy
6
0
1
2
3
4
5
OK
7336
64
34204.npy
6
0
1
2
3
4
5
OK
7336
64
38140.npy
6
0
1
2
3
4
5
OK
7336
64
41699.npy
6
0
1
2
3
4
5
OK
7336
64
41452.npy
6
0
1
2
3
4
5
OK
7336
64
41402.npy
41402.npy is actually bad
34189.npy
6
0
1
2
3
4
5
OK
7336
64
40090.npy
6
0
1
2
3
4
5
OK
7336
64
40278.npy
6
0
1
2
3
4
5
OK
7336
64
34244.npy
6
0
1
2
3
4
5
OK
7336
64
39913.npy
6
0
1
2
3
4
5
OK
7336
64
41362.npy
41362.npy is actually bad
35873.npy
6
0
1
2
3
4
5
OK
7336
64
36323.npy
6
0
1
2
3
4
5
OK
7336
64
35909.npy
6
0
1
2
3
4
5
OK
7336
64
36287.npy
6
0
1
2
3
4
5
OK
7336
64
41872.npy
6
0
1
2
3
4
5
OK
7336
64
36233.npy
6
0
1
2
3
4
5
OK
7336
64
42312.npy
42312.npy is actually bad
40040.npy
6
0
1
2
3
4
5
OK
7336
64
41408.npy
6
0
1
2
3
4
5
OK
7336

In [None]:
len(trainX)

3341

In [None]:
np.save("drive/MyDrive/Wildfire Research Project/Wildfires Data/trainX_yesterday", trainX)
np.save( "drive/MyDrive/Wildfire Research Project/Wildfires Data/trainY_yesterday", trainY)



---

