In [1]:
from preprocess import *
from outlier_detection import *


preprocessor = Preprocess(override=True)

In [6]:
import pickle
from preprocess import *
from outlier_detection import *
from sklearn.svm import OneClassSVM
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
import warnings
import numpy as np

warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


def outlier_detection(preprocessor: Preprocess):
    
    # Load the fire data
    df_true = preprocessor.get_fire_data()
    
    # location = ("location", ['FireLatitude', 'FireLongitude', 'ProvinceID'])
    burn = ("burn", ['HectaresBurnt', 'FireLatitude', 'FireLongitude', 'ProvinceID'])
    temp = ("burn", ['HectaresBurnt', 'FireLatitude', 'FireLongitude', 'ProvinceID'])
    overall = ("overall", ['HectaresBurnt', 'AverageTemperature', 'FireLatitude', 'FireLongitude', 'ProvinceID'])
    
    # Perform DBSCAN clustering
    # for subset in [location, burn, overall]:
    for subset in [burn, temp, overall]:
        df = df_true[subset[1]]
        for province in df['ProvinceID'].unique():
            for eps in [0.1, 0.3, 0.5, 1, 2]:
                for min_samples in [5, 10, 15, 20, 25, 50, 100, 200]:
                    directory = f"./images/dbscan/{subset[0]}/eps{eps}/min{min_samples}/"
                    os.makedirs(directory, exist_ok=True)
                    print(f"Processing {preprocessor.province_mapping[province]} using {subset[0]} data with eps={eps} and min_samples={min_samples}")
                    
                    fire_data = df[df['ProvinceID'] == province]
                    model = DBSCAN(eps=eps, min_samples=min_samples)
                    fire_data['anomaly'] = model.fit_predict(fire_data)
                    fire_data['anomaly'] = fire_data['anomaly'].apply(lambda x: x == -1)
                    
                    geometry = fire_data.apply(lambda x: Point(x["FireLongitude"], x["FireLatitude"]), axis=1)
                    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
                    ax = world.plot(figsize=(10, 6))
                    ax.set_xlim([fire_data['FireLongitude'].min() - 1, fire_data['FireLongitude'].max() + 1])
                    ax.set_ylim([fire_data['FireLatitude'].min() - 1, fire_data['FireLatitude'].max() + 1])
                    gdf = GeoDataFrame(fire_data, geometry=geometry)   
                    gdf.plot(ax=ax, marker='o', markersize=2, column='anomaly', cmap='coolwarm', legend=True)
                    plt.title(f"DBSCAN Clustering for {preprocessor.province_mapping[province]} using {subset[0]} data with eps={eps} and min_samples={min_samples}")
                    plt.tight_layout()
                    
                    plt.savefig(f'{directory}/{preprocessor.province_mapping[province]}.png')
                    plt.close()


outlier_detection(preprocessor)

Processing ON using burn data with eps=0.1 and min_samples=5
Processing ON using burn data with eps=0.1 and min_samples=10
Processing ON using burn data with eps=0.1 and min_samples=15
Processing ON using burn data with eps=0.1 and min_samples=20
Processing ON using burn data with eps=0.1 and min_samples=25
Processing ON using burn data with eps=0.1 and min_samples=50
Processing ON using burn data with eps=0.1 and min_samples=100
Processing ON using burn data with eps=0.1 and min_samples=200
Processing ON using burn data with eps=0.3 and min_samples=5
Processing ON using burn data with eps=0.3 and min_samples=10
Processing ON using burn data with eps=0.3 and min_samples=15
Processing ON using burn data with eps=0.3 and min_samples=20
Processing ON using burn data with eps=0.3 and min_samples=25
Processing ON using burn data with eps=0.3 and min_samples=50
Processing ON using burn data with eps=0.3 and min_samples=100
Processing ON using burn data with eps=0.3 and min_samples=200
Proces