## Imports

In [22]:
import csv
import re
import os
import pandas as pd
from pathlib import Path
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import Choropleth
from folium.plugins import HeatMap
from geopy.geocoders import Nominatim
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.manifold import TSNE
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.metrics import silhouette_score, silhouette_samples, mean_squared_error, r2_score, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.regularizers import L2
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.ensemble import GradientBoostingClassifier
import random
from sklearn.utils import resample


random.seed(69)

In [23]:
p = Path.cwd()
data_folder = p.parent.parent / "data"
shape_folder = p.parent.parent / "shape"

## Leitura do ficheiro

In [24]:
file = data_folder / "AllDaysEventos.parquet"
QuebrasFinal = pd.read_parquet(file)
QuebrasFinal

Unnamed: 0,Data do incidente,Concelho,Distrito,Nível de Tensão,Número de Eventos,Dia,Mês,Ano,Densidade Populacional,População Residente,...,Indice de Envelhecimento,Rural,T_MED,T_MAX,T_MIN,DD_MED,DD_FFX,FF_MED,FF_MAX,PR_QTD
0,2014-01-01,Almada,Setúbal,Sem Eventos Excecionais,0,1,1,2014,2486.8,174592.0,...,142.4,0.000000,,,,,,,,
1,2014-01-01,Sertã,Castelo Branco,Sem Eventos Excecionais,0,1,1,2014,34.0,15188.0,...,219.9,1.000000,10.6,12.2,8.1,222.0,248.0,4.0,12.6,13.8
2,2014-01-01,Felgueiras,Porto,Sem Eventos Excecionais,0,1,1,2014,493.1,57074.0,...,90.1,0.800000,,,,,,,,
3,2014-01-01,Torres Vedras,Lisboa,Sem Eventos Excecionais,0,1,1,2014,197.2,80291.0,...,138.6,0.923077,14.4,15.2,12.0,232.0,236.0,5.4,13.4,3.8
4,2014-01-01,Nisa,Portalegre,Sem Eventos Excecionais,0,1,1,2014,12.0,6929.0,...,419.1,1.000000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008001,2023-12-31,Campo Maior,Portalegre,Sem Eventos Excecionais,0,31,12,2023,32.3,7993.0,...,163.9,1.000000,8.9,10.6,7.8,293.0,328.0,2.6,8.1,1.6
1008002,2023-12-31,Castelo De Paiva,Aveiro,Sem Eventos Excecionais,0,31,12,2023,134.1,15420.0,...,188.0,1.000000,,,,337.0,339.0,2.6,10.1,5.8
1008003,2023-12-31,Vila Flor,Bragança,Sem Eventos Excecionais,0,31,12,2023,22.7,6042.0,...,398.0,1.000000,6.9,9.9,4.9,264.0,208.0,2.9,12.7,0.8
1008004,2023-12-31,Vimioso,Bragança,Sem Eventos Excecionais,0,31,12,2023,8.6,4143.0,...,573.7,1.000000,6.9,9.9,4.9,264.0,208.0,2.9,12.7,0.8


In [31]:
QuebrasFinal.columns = QuebrasFinal.columns.str.lower().str.replace(' ', '_')

QuebrasFinal["concelho"] = QuebrasFinal["concelho"].str.upper()

def remover_pontuacao(text):
    text = text.replace('Ç', 'C').replace('Ã', 'A').replace('É', 'E').replace('Â', 'A').replace('Ô', 'O').replace('Á', 'A').replace('Ú', 'U').replace('Ê', 'E').replace('Ó', 'O').replace('Í', 'I')
    return text
    
QuebrasFinal["concelho"] = QuebrasFinal["concelho"].apply(remover_pontuacao)
QuebrasFinal["concelho"] = QuebrasFinal["concelho"].str.replace('-', ' ')

In [32]:
QuebrasFinal.columns

Index(['data_do_incidente', 'concelho', 'distrito', 'nível_de_tensão',
       'número_de_eventos', 'dia', 'mês', 'ano', 'densidade_populacional',
       'população_residente', 'setor_primário', 'setor_secundário',
       'setor_terciário', 'indice_de_envelhecimento', 'rural', 't_med',
       't_max', 't_min', 'dd_med', 'dd_ffx', 'ff_med', 'ff_max', 'pr_qtd'],
      dtype='object')

## Juntar Zonas

In [33]:
file_path = data_folder / "EventosFinalCompleto.feather"
Bruh = pd.read_feather(file_path)
Bruh

Unnamed: 0,codigo_do_relatorio,concelho,data_do_incidente,nivel_de_tensao,causa_do_incidente,duracao_incidente_min_,n_º_clientes_afetados,tiepi_mt_min_,saifi_mt_#_,saidi_mt_min_,...,t_med,t_max,t_min,dd_med,dd_ffx,ff_med,ff_max,pr_qtd,MaxHeight,MinHeight
0,EDPD_2018_JAN_T_1,ALMADA,2018-01-02,MT,Escavações,55,828,0.002935,0.000044,0.002456,...,10.70,17.40,2.600,278.25,289.50,2.0,7.800,0.000000,125.0,0.0
1,EDPD_2018_JAN_T_2,SERTA,2018-01-02,MT,Abate De Árvores,163,549,0.002933,0.000177,0.016501,...,10.60,13.80,6.200,249.00,270.00,2.8,8.100,0.000000,1084.0,125.0
2,EDPD_2018_JAN_T_3,FELGUEIRAS,2018-01-04,MT,Deslizamento De Terras,596,3459,0.019228,0.000266,0.035905,...,14.00,14.80,12.900,197.00,180.00,3.9,14.400,4.000000,575.0,145.0
3,EDPD_2018_JAN_T_4,TORRES VEDRAS,2018-01-05,BT,Veículos,178,102,0.000000,0.000000,0.000000,...,12.40,15.10,9.100,288.00,225.00,3.6,13.900,13.200000,394.0,0.0
4,EDPD_2018_JAN_T_5,NISA,2018-01-09,MT,Aves,119,1731,0.004422,0.000155,0.010038,...,4.90,7.80,2.200,137.00,118.00,4.1,15.400,15.900000,463.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2917,EDPD_2020_DEZ_T_21,POMBAL,2020-12-24,MT,Abate De Árvores,139,2517,0.005941,0.000461,0.017245,...,7.90,12.20,2.100,7.00,351.00,1.7,7.600,0.000000,560.0,0.0
2918,EDPD_2020_DEZ_T_22,VILA NOVA DE GAIA,2020-12-30,MT,Aves,81,3641,0.011914,0.000290,0.011131,...,4.45,9.05,0.275,239.00,186.25,1.5,4.825,0.787500,262.0,0.0
2919,EDPD_2020_DEZ_T_23,ALBUFEIRA,2020-12-28,MT,Escavações,81,3175,0.005974,0.000086,0.005940,...,13.20,15.30,7.700,285.00,293.00,5.8,14.100,0.100000,227.0,0.0
2920,EDPD_2020_DEZ_T_24,CASTRO MARIM,2020-12-28,MT,Veículos,239,591,0.006928,0.000054,0.011528,...,13.20,15.30,7.700,285.00,293.00,5.8,14.100,0.100000,276.0,0.0


In [38]:
zonas = Bruh[['distrito', 'concelho', 'count_zonaa', 'count_zonab', 'count_zonac']].groupby(['distrito', 'concelho', 'count_zonaa', 'count_zonab', 'count_zonac']).mean().reset_index()
zonas

Unnamed: 0,distrito,concelho,count_zonaa,count_zonab,count_zonac
0,AVEIRO,AGUEDA,0,7,20
1,AVEIRO,ALBERGARIA A VELHA,0,12,7
2,AVEIRO,ANADIA,0,7,14
3,AVEIRO,AROUCA,0,0,20
4,AVEIRO,AVEIRO,112,0,19
...,...,...,...,...,...
251,VISEU,TABUACO,0,0,8
252,VISEU,TONDELA,0,7,31
253,VISEU,VILA NOVA DE PAIVA,0,0,5
254,VISEU,VISEU,72,9,60


In [40]:
merged_df = pd.merge(QuebrasFinal, zonas, on=['concelho'], how='left')
merged_df

Unnamed: 0,data_do_incidente,concelho,distrito_x,nível_de_tensão,número_de_eventos,dia,mês,ano,densidade_populacional,população_residente,...,t_min,dd_med,dd_ffx,ff_med,ff_max,pr_qtd,distrito_y,count_zonaa,count_zonab,count_zonac
0,2014-01-01,ALMADA,Setúbal,Sem Eventos Excecionais,0,1,1,2014,2486.8,174592.0,...,,,,,,,SETUBAL,102.0,63.0,14.0
1,2014-01-01,SERTA,Castelo Branco,Sem Eventos Excecionais,0,1,1,2014,34.0,15188.0,...,8.1,222.0,248.0,4.0,12.6,13.8,CASTELO-BRANCO,0.0,8.0,29.0
2,2014-01-01,FELGUEIRAS,Porto,Sem Eventos Excecionais,0,1,1,2014,493.1,57074.0,...,,,,,,,PORTO,0.0,32.0,13.0
3,2014-01-01,TORRES VEDRAS,Lisboa,Sem Eventos Excecionais,0,1,1,2014,197.2,80291.0,...,12.0,232.0,236.0,5.4,13.4,3.8,LISBOA,0.0,27.0,40.0
4,2014-01-01,NISA,Portalegre,Sem Eventos Excecionais,0,1,1,2014,12.0,6929.0,...,,,,,,,PORTALEGRE,0.0,5.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008001,2023-12-31,CAMPO MAIOR,Portalegre,Sem Eventos Excecionais,0,31,12,2023,32.3,7993.0,...,7.8,293.0,328.0,2.6,8.1,1.6,,,,
1008002,2023-12-31,CASTELO DE PAIVA,Aveiro,Sem Eventos Excecionais,0,31,12,2023,134.1,15420.0,...,,337.0,339.0,2.6,10.1,5.8,,,,
1008003,2023-12-31,VILA FLOR,Bragança,Sem Eventos Excecionais,0,31,12,2023,22.7,6042.0,...,4.9,264.0,208.0,2.9,12.7,0.8,,,,
1008004,2023-12-31,VIMIOSO,Bragança,Sem Eventos Excecionais,0,31,12,2023,8.6,4143.0,...,4.9,264.0,208.0,2.9,12.7,0.8,,,,


In [37]:
QuebrasFinal.columns

Index(['data_do_incidente', 'concelho', 'distrito', 'nível_de_tensão',
       'número_de_eventos', 'dia', 'mês', 'ano', 'densidade_populacional',
       'população_residente', 'setor_primário', 'setor_secundário',
       'setor_terciário', 'indice_de_envelhecimento', 'rural', 't_med',
       't_max', 't_min', 'dd_med', 'dd_ffx', 'ff_med', 'ff_max', 'pr_qtd'],
      dtype='object')

## Time Series