In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from mpl_toolkits import mplot3d

from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA

import scipy
from scipy.stats import norm
import pickle
import gc 

from IPython.display import display

In [2]:
fire_path = ""
mask_path = ""

fires = pickle.load(open(fire_path, "rb"))
masks = pickle.load(open(mask_path, "rb"))

In [3]:
def return_idxs(data):
    idxs = np.argwhere(np.min(data.reshape(data.shape[0], 128*128*10), axis=1) != 0)
    idxs = idxs.reshape(idxs.shape[0])
    return idxs

In [4]:
idxs_fire = return_idxs(fires)
fires = fires[idxs_fire]
masks = masks[idxs_fire]

In [5]:
cirrus_path = ""
cirrus = pickle.load(open(cirrus_path, "rb"))

In [6]:
def cirrus_idxs(data):
    cirrus_band = 7
    max_range_cirrus = 500 # experimentally decided; for the images that actually have cirrus contamination, the range of the pixel values is much bigger
    lst = []
    lst_of_vals = []
    for i in range(data.shape[0]):
        condition1 = (np.max(data[i,:,:,cirrus_band]) - np.min(data[i,:,:,cirrus_band])) >= max_range_cirrus
        condition2 = list(data[i,:,:,7].flatten()) not in lst_of_vals
        if condition1 and condition2:
            lst.append(i)
            lst_of_vals.append(list(data[i,:,:,7].flatten()))
    return lst

In [7]:
cirrus_idxs_fires = cirrus_idxs(fires) # length: 2152

In [8]:
num_fires = np.sum(np.sum(masks, axis=-1), axis=-1)[cirrus_idxs_fires]

In [9]:
dense_cirrus = np.sum(np.sum(cirrus[:,:,:,2], axis=1), axis=1)[:len(cirrus_idxs_fires)]
scattered_cirrus = np.sum(np.sum(cirrus[:,:,:,1], axis=1), axis=1)[:len(cirrus_idxs_fires)]
no_cirrus = np.sum(np.sum(cirrus[:,:,:,0], axis=1), axis=1)[:len(cirrus_idxs_fires)]

In [10]:
less_fire_idxs = np.argwhere(num_fires < 20).reshape(-1,) # less than 20 pixels with fire
y_less_scatter = num_fires[less_fire_idxs]
more_fire_idxs = np.argwhere(num_fires < 20).reshape(-1,) # less than 20 pixels with fire
y_more_scatter = num_fires[more_fire_idxs]

In [11]:
X_less_scatter_no_cirrus = no_cirrus[less_fire_idxs]
X_less_scatter_dense_cirrus = dense_cirrus[less_fire_idxs]
X_less_scatter_scattered_cirrus = scattered_cirrus[less_fire_idxs]
X_more_scatter_no_cirrus = no_cirrus[more_fire_idxs]
X_more_scatter_dense_cirrus = dense_cirrus[more_fire_idxs]
X_more_scatter_scattered_cirrus = scattered_cirrus[more_fire_idxs]

In [12]:
titles = ["No Cirrus <20 Fires", "Scattered Cirrus <20 Fires", "Dense Cirrus <20 Fires"]
j = 0
for i in [X_less_scatter_no_cirrus, X_less_scatter_dense_cirrus, X_less_scatter_scattered_cirrus]:
    plt.scatter(i, y_less_scatter)
    plt.title(titles[j])
    plt.xlabel("# of pixels in the cirrus contamination category")
    plt.ylabel("# of fires")
    plt.show()
    j += 1

In [13]:
titles = ["No Cirrus >20 Fires", "Scattered Cirrus >20 Fires", "Dense Cirrus >20 Fires"]
j = 0
for i in [X_more_scatter_no_cirrus, X_more_scatter_dense_cirrus, X_more_scatter_scattered_cirrus]:
    plt.scatter(i, y_more_scatter)
    plt.title(titles[j])
    plt.xlabel("# of pixels in the cirrus contamination category")
    plt.ylabel("# of fires")
    plt.show()
    j += 1

In [14]:
titles = ["No Cirrus", "Scattered Cirrus", "Dense Cirrus"]
j = 0
for i in [no_cirrus, scattered_cirrus, dense_cirrus]:
    plt.scatter(i, np.log10(num_fires))
    plt.title(titles[j])
    plt.xlabel("# of pixels in the cirrus contamination category")
    plt.ylabel("# of fires")
    plt.show()
    j += 1

In [15]:
X_no_cirrus = np.stack((no_cirrus, np.log10(num_fires))).T
X_scattered_cirrus = np.stack((scattered_cirrus, np.log10(num_fires))).T
X_dense_cirrus = np.stack((dense_cirrus, np.log10(num_fires))).T

In [16]:
titles = ["No Cirrus", "Scattered Cirrus", "Dense Cirrus"]
lr = LinearRegression()
j = 0
for X in [X_no_cirrus, X_scattered_cirrus, X_dense_cirrus]:
    lr.fit(X[:,0].reshape(-1,1), X[:,1])
    X_pred = np.linspace(X[:,0].min(), X[:,0].max()).reshape(-1,1)
    plt.scatter(X[:,0], X[:,1])
    plt.plot(X_pred, lr.predict(X_pred), c="r")
    plt.title(titles[j])
    plt.show()
    print("Slope: " + str(lr.coef_))
    j += 1

In [17]:
dense_cirrus_nonfire = np.sum(np.sum(cirrus[:,:,:,2], axis=1), axis=1)[len(idxs_fire):]
scattered_cirrus_nonfire = np.sum(np.sum(cirrus[:,:,:,1], axis=1), axis=1)[len(idxs_fire):]
no_cirrus_nonfire = np.sum(np.sum(cirrus[:,:,:,0], axis=1), axis=1)[len(idxs_fire):]

In [18]:
print(np.mean(dense_cirrus), np.std(dense_cirrus))
print(np.mean(scattered_cirrus), np.std(scattered_cirrus))
print(np.mean(no_cirrus), np.std(no_cirrus))

In [19]:
plt.hist(dense_cirrus, bins=50)
plt.show()
plt.hist(scattered_cirrus, bins=100)
plt.show()
plt.hist(no_cirrus, bins=100)
plt.show()