In [1]:
import geopandas as gpd
import pandas as pd
import folium
import re
from folium.plugins import MarkerCluster
import os
from folium.plugins import TimestampedGeoJson

In [2]:
def load_data():
    # DETER alerts_df_df_df (GEODF)
    alerts_df = gpd.read_file('../data/deter-amz-public-2024out08/deter-amz-deter-public.shp', encoding='utf-8')
    alerts_df.loc[alerts_df['CLASSNAME'] == 'DEGRDACAO', 'CLASSNAME'] = 'DEGRADACAO'
    alerts_df = alerts_df[~(alerts_df['CLASSNAME'] == 'CORTE_SELETIVO')]
    alerts_df['VIEW_DATE'] = pd.to_datetime(alerts_df['VIEW_DATE'])
    alerts_df['ANO'] = alerts_df['VIEW_DATE'].dt.year
    alerts_df['MES'] = alerts_df['VIEW_DATE'].dt.month
    alerts_df['MES/ANO'] = alerts_df['VIEW_DATE'].dt.strftime('%Y-%m')

    # DETER ALERTS (CSV)
    df_deter = pd.DataFrame(alerts_df)
    df_deter = df_deter.drop(columns=['FID', 'QUADRANT', 'PATH_ROW', 'SENSOR', 'SATELLITE', 'geometry'])

    
    #IBGE DATA
    legal_amazon = gpd.read_file('../data/brazilian_legal_amazon/brazilian_legal_amazon.shp',encoding='utf-8')
    states = gpd.read_file('../data/states_legal_amazon/states_legal_amazon.shp',encoding='utf-8')
    
    ac = gpd.read_file('../data/malhas_regionais_ibge/AC_Municipios_2022/AC_Municipios_2022.shp', encoding='utf-8')
    am = gpd.read_file('../data/malhas_regionais_ibge/AM_Municipios_2022/AM_Municipios_2022.shp', encoding='utf-8')
    ap = gpd.read_file('../data/malhas_regionais_ibge/AP_Municipios_2022/AP_Municipios_2022.shp', encoding='utf-8')
    ma = gpd.read_file('../data/malhas_regionais_ibge/MA_Municipios_2022/MA_Municipios_2022.shp', encoding='utf-8')
    mt = gpd.read_file('../data/malhas_regionais_ibge/MT_Municipios_2022/MT_Municipios_2022.shp', encoding='utf-8')
    pa = gpd.read_file('../data/malhas_regionais_ibge/PA_Municipios_2022/PA_Municipios_2022.shp', encoding='utf-8')
    ro = gpd.read_file('../data/malhas_regionais_ibge/RO_Municipios_2022/RO_Municipios_2022.shp', encoding='utf-8')
    rr = gpd.read_file('../data/malhas_regionais_ibge/RR_Municipios_2022/RR_Municipios_2022.shp', encoding='utf-8')
    to = gpd.read_file('../data/malhas_regionais_ibge/TO_Municipios_2022/TO_Municipios_2022.shp', encoding='utf-8')

    df_states = pd.concat([ac, am, ap, ma, mt, pa, ro, rr, to])

    df_states.rename(columns={'CD_MUN':'GEOCODIBGE'}, inplace=True)

    c_units = gpd.read_file('../data/conservation_units_legal_amazon/conservation_units_legal_amazon.shp',encoding='utf-8')
    c_units.rename(columns={'nome':'UC'},inplace=True)
    
    return alerts_df, df_deter, legal_amazon, states, df_states, c_units

alerts_df, df_deter, legal_amazon, states, df_states, c_units = load_data()
folder_path = "../Visualizations/DETER/Maps"
for file in os.listdir(folder_path):
    os.remove(os.path.join(folder_path,file))



In [3]:
df_deter["UF"].unique()

array(['PA', 'MT', 'AM', 'RO', 'MA', 'TO', 'RR', 'AP', 'AC'], dtype=object)

In [4]:
dfs_deter = {uf: df_deter[df_deter['UF'] == uf] for uf in df_deter['UF'].unique()}

df_deter_PA = dfs_deter['PA']
df_deter_MT = dfs_deter['MT']
df_deter_AM = dfs_deter['AM']
df_deter_RO = dfs_deter['RO']
df_deter_MA = dfs_deter['MA']
df_deter_TO = dfs_deter['TO']
df_deter_RR = dfs_deter['RR']
df_deter_AP = dfs_deter['AP']
df_deter_AC = dfs_deter['AC']

deter_states = ['PA', 'MT', 'AM', 'RO', 'MA', 'TO', 'RR', 'AP', 'AC']
for s in deter_states:
    var_name = f'df_deter_{s}'  # Dynamically create the variable name
    grouped = globals()[var_name].groupby('VIEW_DATE', as_index=False)['AREAMUNKM'].sum()
    globals()[var_name] = pd.merge(globals()[var_name], grouped, on='VIEW_DATE', suffixes=('', '_SUM'))
    globals()[var_name] = globals()[var_name].drop_duplicates(subset=["VIEW_DATE"], keep="first")
    
print(df_deter_MA.count())
print(df_deter_MA["VIEW_DATE"].unique())



CLASSNAME        519
VIEW_DATE        519
AREAUCKM         519
UC                26
AREAMUNKM        519
MUNICIPALI       519
GEOCODIBGE       519
UF               519
ANO              519
MES              519
MES/ANO          519
AREAMUNKM_SUM    519
dtype: int64
<DatetimeArray>
['2018-01-21 00:00:00', '2018-02-16 00:00:00', '2018-01-30 00:00:00',
 '2018-02-19 00:00:00', '2018-02-22 00:00:00', '2018-02-27 00:00:00',
 '2016-11-27 00:00:00', '2018-02-28 00:00:00', '2018-03-03 00:00:00',
 '2018-05-23 00:00:00',
 ...
 '2018-05-13 00:00:00', '2018-05-14 00:00:00', '2024-09-27 00:00:00',
 '2018-01-04 00:00:00', '2018-05-28 00:00:00', '2018-04-06 00:00:00',
 '2018-02-11 00:00:00', '2018-04-15 00:00:00', '2018-01-09 00:00:00',
 '2018-01-01 00:00:00']
Length: 519, dtype: datetime64[ms]


In [6]:
print(alerts_df)

                FID             CLASSNAME QUADRANT PATH_ROW  VIEW_DATE SENSOR  \
0       100002_hist  CICATRIZ_DE_QUEIMADA     None   170105 2018-01-11   AWFI   
1       100003_hist  CICATRIZ_DE_QUEIMADA     None   169105 2018-01-14   AWFI   
2       100005_curr  CICATRIZ_DE_QUEIMADA     None   037017 2024-09-26    WFI   
3       100005_hist      DESMATAMENTO_VEG     None   169105 2018-01-14   AWFI   
4       100006_hist  CICATRIZ_DE_QUEIMADA     None   169105 2018-01-14   AWFI   
...             ...                   ...      ...      ...        ...    ...   
392804    9999_curr       DESMATAMENTO_CR     None   038016 2023-09-29    WFI   
392805     999_curr  CICATRIZ_DE_QUEIMADA     None   036016 2023-08-16    WFI   
392806      99_hist            DEGRADACAO        D   321074 2016-08-02  AWIFS   
392807       9_curr       DESMATAMENTO_CR     None   036016 2023-08-01    WFI   
392808       9_hist  CICATRIZ_DE_QUEIMADA        B   324078 2016-08-17  AWIFS   

            SATELLITE  AREA

In [7]:
print("DF Deter", df_deter)
print("\nLegal Amazon", legal_amazon)
print("\nStates", states)

DF Deter                    CLASSNAME  VIEW_DATE  AREAUCKM    UC  AREAMUNKM  \
0       CICATRIZ_DE_QUEIMADA 2018-01-11       0.0  None   0.459839   
1       CICATRIZ_DE_QUEIMADA 2018-01-14       0.0  None   0.340975   
2       CICATRIZ_DE_QUEIMADA 2024-09-26       0.0  None   1.373554   
3           DESMATAMENTO_VEG 2018-01-14       0.0  None   0.070781   
4       CICATRIZ_DE_QUEIMADA 2018-01-14       0.0  None   0.149432   
...                      ...        ...       ...   ...        ...   
392804       DESMATAMENTO_CR 2023-09-29       0.0  None   0.206886   
392805  CICATRIZ_DE_QUEIMADA 2023-08-16       0.0  None   0.425538   
392806            DEGRADACAO 2016-08-02       0.0  None  22.043149   
392807       DESMATAMENTO_CR 2023-08-01       0.0  None   0.139690   
392808  CICATRIZ_DE_QUEIMADA 2016-08-17       0.0  None   7.582060   

                MUNICIPALI GEOCODIBGE  UF   ANO  MES  MES/ANO  
0             Monte Alegre    1504802  PA  2018    1  2018-01  
1                 Itai

In [8]:
print("DF Municipalities", df_states)
print("\nC Units", c_units)

DF Municipalities     GEOCODIBGE          NM_MUN SIGLA_UF  AREA_KM2  \
0      1200013      Acrelândia       AC  1811.613   
1      1200054    Assis Brasil       AC  4979.073   
2      1200104       Brasiléia       AC  3928.174   
3      1200138          Bujari       AC  3034.869   
4      1200179        Capixaba       AC  1705.824   
..         ...             ...      ...       ...   
134    1721208  Tocantinópolis       TO  1083.600   
135    1721257        Tupirama       TO   706.883   
136    1721307      Tupiratins       TO   889.126   
137    1722081    Wanderlândia       TO  1365.431   
138    1722107         Xambioá       TO  1190.489   

                                              geometry  
0    POLYGON ((-67.07612 -10.08798, -67.07659 -10.0...  
1    POLYGON ((-69.55253 -10.87353, -69.52086 -10.8...  
2    POLYGON ((-68.75712 -11.01097, -68.75752 -11.0...  
3    POLYGON ((-67.92167 -9.69355, -67.91736 -9.693...  
4    POLYGON ((-67.73403 -10.71177, -67.73414 -10.7...  
.. 

In [None]:
# LOADING TEXTS (ENGLISH AND PORTUGUESE)

df_texts = pd.read_csv('../texts/texts_deter.csv', sep='§', engine='python')
english = {list(df_texts['Key'])[i]: list(df_texts['English'])[i] for i in range(len(list(df_texts['Key'])))}
# portuguese = {list(df_texts['Key'])[i]: list(df_texts['Portuguese'])[i] for i in range(len(list(df_texts['Key'])))}

classes_deter_en = {'CICATRIZ_DE_QUEIMADA': 'Forest Fire Scar',
          'DESMATAMENTO_CR': 'Deforestation with Exposed Soil',
          'DESMATAMENTO_VEG': 'Deforestation with Vegetation',
          'MINERACAO': 'Mining',
          'DEGRADACAO': 'Degradation',
          'CS_DESORDENADO': 'Selective Logging Type 1 (Disordered)',
          'CS_GEOMETRICO': 'Selective Logging Type 2 (Geometric)',
}

states_dict = {
    "MT": "Mato Grosso",
    "PA": "Pará",
    "AM": "Amazonas",
    "RO": "Rondônia",
    "MA": "Maranhão",
    "RR": "Roraima",
    "AC": "Acre",
    "TO": "Tocantins",
    "AP": "Amapá"
}

def get_texts():
    return classes_deter_en, english



dict_classes, texts = get_texts()

In [None]:
def get_centroids(geo_df, mode=2, crs='EPSG:31982'):

    if mode==0:
        centroids = geo_df.copy()
        centroids["centroid"] = centroids.geometry.centroid
        centroids["latitude"] = centroids.centroid.y
        centroids["longitude"] = centroids.centroid.x
        return centroids

    if mode==1:
        geo_df_proj = geo_df.copy()
        geo_df_proj.to_crs(crs)
        
        geo_df_proj['centroid'] = geo_df_proj.geometry.centroid
        geo_df_proj['latitude'] = geo_df_proj.centroid.y
        geo_df_proj['longitude'] = geo_df_proj.centroid.x
        
        centroids = gpd.GeoDataFrame(geo_df_proj, geometry='centroid', crs=crs)
    
        centroids = centroids.to_crs(geo_df.crs)
        return centroids

    if mode==2:
        df = geo_df.copy()
        df['representative_point'] = df.geometry.representative_point()
        df['latitude'] = df['representative_point'].apply(lambda p: p.y)
        df['longitude'] = df['representative_point'].apply(lambda p: p.x)
        return df

def folium_map_init():
    map = folium.Map(location=[-7.25, -60], zoom_start=4)
    return map

def folium_add_markers(container, df_data, geo_df, get_centroid_mode, df_deter, key, popup_title_column, popup_total_area_text='Área Total Afetada:',total_area_column='AREAMUNKM'):

    all_classes = sorted(df_deter['CLASSNAME'].unique())
    df_centroids = get_centroids(geo_df,get_centroid_mode)
    
    for idx, row in df_data.iterrows():

        coords = df_centroids.loc[df_centroids[key] == row[key]].iloc[0]

        info = ''
        
        if row['AREAMUNKM']>0:
            df_stats = df_deter[df_deter[key] == row[key]]
            df_stats_summed = df_stats.groupby('CLASSNAME')['AREAMUNKM'].sum().reset_index()
            df_stats_complete = pd.DataFrame({'CLASSNAME': all_classes})
            df_stats_complete = df_stats_complete.merge(df_stats_summed, on='CLASSNAME', how='left').fillna(0)
            df_stats_complete['DESC'] = df_stats_complete['CLASSNAME'].map(dict_classes)
            df_stats_complete = df_stats_complete.sort_values(by='AREAMUNKM', ascending=False)
    
            total = df_stats_complete['AREAMUNKM'].sum()
            
            # Calculates percentage of every class
            for ind, lin in df_stats_complete.iterrows():
                perc = (lin['AREAMUNKM'] * 100) / total
                info += f"{lin['DESC']}: {lin['AREAMUNKM']:.0f} km² ({perc:.2f}%)<br>"
        
        popup_text = f"""
        <div style='white-space: nowrap;'>
        <span style='font-size: 16px; font-weight: bold;'>{row[popup_title_column]}</span><br><br> {popup_total_area_text} {row[total_area_column]:.0f} km²<br><br> {info}
        </div>
        """

        # Add marker on Map or MarkerCluster (container)
        folium.Marker(
            location=[coords['latitude'], coords['longitude']],
            popup=popup_text,
            icon=folium.Icon(color='red', icon='triangle-exclamation', prefix='fa')
        ).add_to(container)


    return container

def folium_add_markers_pred(container, df_data, geo_df, get_centroid_mode, grouped_df, key, popup_title_column, popup_total_area_text='Área Total Afetada:',total_area_column='AREAMUNKM'):

    # all_classes = sorted(grouped_df['CLASSNAME'].unique())
    df_centroids = get_centroids(geo_df,get_centroid_mode)
    
    for idx, row in df_data.iterrows():

        coords = df_centroids.loc[df_centroids[key] == row[key]].iloc[0]

        info = ''
        
        popup_text = f"""
        <div style='white-space: nowrap;'>
        <span style='font-size: 16px; font-weight: bold;'>{row[popup_title_column]}</span><br><br> {popup_total_area_text} {row[total_area_column]:.0f} km²<br><br> {info}
        </div>
        """

        # Add marker on Map or MarkerCluster (container)
        folium.Marker(
            location=[coords['latitude'], coords['longitude']],
            popup=popup_text,
            icon=folium.Icon(color='red', icon='triangle-exclamation', prefix='fa')
        ).add_to(container)
    
    return container

In [13]:
def save_map(file_name,map):
    map.save(f"../Visualizations/DETER/Maps/{file_name}.html")

### States Map:

In [None]:
def states_map():
    ############# Data Preparation #############
    
    df_deter = alerts_df.copy()
    gb_uf = df_deter.groupby('UF')['AREAMUNKM'].sum().sort_values(ascending=False)
    gb_uf = pd.DataFrame(gb_uf)
    gb_uf['NOME_ESTADO'] = gb_uf.index.map(states_dict)
    gb_uf['NOME_SIGLA'] = gb_uf['NOME_ESTADO'] + ' (' + gb_uf.index + ')' 
    gb_uf = gb_uf.reset_index()

    states_copy = states.copy()
    states_copy = states_copy.rename(columns={'sigla': 'UF'})

    #############       Folium       #############
    map = folium_map_init()

    # Customizing state borders color
    style_states = {'fillOpacity': 0.3, 'color': '#005f73', 'weight': 2}
    folium.GeoJson(states_copy, name='States', style_function=lambda x: style_states).add_to(map)

    # Customizing Legal Amazon border color
    style_legal_amazon = {'fillOpacity': 0, 'color': '#0a9396', 'weight': 3}
    folium.GeoJson(legal_amazon, name='Legal Amazon', style_function=lambda x: style_legal_amazon).add_to(map)

    # Customizing Choropleth color scheme
    folium.Choropleth(
        geo_data=states_copy,
        data=gb_uf,
        columns=['UF', 'AREAMUNKM'],
        key_on='feature.properties.UF',
        fill_color='Reds',
        fill_opacity=0.7,
        line_opacity=0.2,
        nan_fill_color='white',
        bins=8,
        highlight=True,
        legend_name='Affected Area in km²',
        name='Most Affected States'
    ).add_to(map)

    # Adding markers with popups for more information
    map = folium_add_markers(map, gb_uf, states_copy, 1, df_deter, 'UF', 'NOME_SIGLA', texts['total_dmg'], 'AREAMUNKM')

    # Adding layer control
    folium.LayerControl().add_to(map)

    return map


In [15]:
dict_classes, texts = get_texts()
map = states_map()
save_map('States_EN',map)


  geo_df_proj['centroid'] = geo_df_proj.geometry.centroid

  geo_df_proj['latitude'] = geo_df_proj.centroid.y

  geo_df_proj['longitude'] = geo_df_proj.centroid.x


### Cities Map

In [None]:
def cities_map(filter=[]):
    
    ############# Data Preparation #############

    geocodibge = alerts_df.drop_duplicates(subset='MUNICIPALI').set_index('MUNICIPALI')['GEOCODIBGE']
    sum_areamunkm = alerts_df.groupby('MUNICIPALI')['AREAMUNKM'].sum().reset_index()
    sum_areamunkm['GEOCODIBGE'] = sum_areamunkm['MUNICIPALI'].map(geocodibge)

    merge = pd.merge(df_states, sum_areamunkm, on='GEOCODIBGE', how='left')

    if len(filter)>0:
        merge = merge[merge['SIGLA_UF'].isin(filter)]
    
    merge['AREAMUNKM'].fillna(0, inplace=True)

    #############       Folium       #############
    map = folium_map_init()

    style_cities = {'fillOpacity':0 ,'color' : '#117306', 'weight': 1}
    folium.GeoJson(merge, name = 'Cities', style_function= lambda x: style_cities).add_to(map)

    style_states = {'fillOpacity':0 ,'color' : '#117306', 'weight': 2}
    folium.GeoJson(states, name = 'States', style_function= lambda x: style_states).add_to(map)
    
    style_legal_amazon = {'fillOpacity':0 ,'color' : '#117306', 'weight': 3}
    folium.GeoJson(legal_amazon, name = 'Legal Amazon', style_function= lambda x: style_legal_amazon).add_to(map)
    
    folium.Choropleth(geo_data=merge.to_json(),
                  name='Choropleth',
                  data=merge,
                  columns=['GEOCODIBGE', 'AREAMUNKM'],
                  key_on = 'feature.properties.GEOCODIBGE',
                  fill_color = 'Reds',
                  nan_fill_color = 'white',
                  highlight = True,
                  legend_name='Affected Area in km²').add_to(map)

    marker_cluster = MarkerCluster().add_to(map)
    marker_cluster = folium_add_markers(marker_cluster, merge, merge, 2, alerts_df, 'GEOCODIBGE', 'NM_MUN', texts['total_dmg'], 'AREAMUNKM')
    folium.LayerControl().add_to(map)

    return map

In [17]:
lst_states = list(df_states['SIGLA_UF'].unique())
lst_states

['AC', 'AM', 'AP', 'MA', 'MT', 'PA', 'RO', 'RR', 'TO']

In [18]:
dict_classes, texts = get_texts()

for i in range(len(lst_states)):
    filter = []
    filter.append(lst_states[i])
    map = cities_map(filter)
    map_name = 'Cities_EN_' + filter[0]
    save_map(map_name,map)
    print(map_name + ' saved.')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_AC saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_AM saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_AP saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_MA saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_MT saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_PA saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_RO saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_RR saved.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


Cities_EN_TO saved.


In [19]:
dict_classes, texts = get_texts()
map = cities_map([])
map_name = 'All_Cities_EN'
save_map(map_name,map)
print(map_name + ' saved.')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merge['AREAMUNKM'].fillna(0, inplace=True)


All_Cities_EN saved.


### C Units

In [20]:
def normalize_string(s):
    s = s.strip()
    s = re.sub(r'\s+', ' ', s)
    s = re.sub(r'[^\w\s]', '', s)
    s = s.upper()
    return s

def c_units_map():
    ############# Data Preparation #############
    c_units_copy = c_units.copy()
    c_units_copy['UC'] = c_units_copy['UC'].apply(normalize_string)
    
    alerts_uc = alerts_df[alerts_df['UC'].notna()].copy()
    dic_correcao = {'FLORESTA NACIONAL DE ALTAMIRA': 'FLORESTA NACIONAL ALTAMIRA', 
                    'FLORESTA NACIONAL DE CAXIUANÂ': 'FLORESTA NACIONAL DE CAXIUANÃ', 
                    'FLORESTA NACIONAL DO AMANA': 'FLORESTA NACIONAL DO AMANÁ',
                    'FLORESTA NACIONAL DO BOM FUTURO': 'FLORESTA NACIONAL DE BOM FUTURO',
                    'FLORESTA NACIONAL DO ITACAIUNAS': 'FLORESTA NACIONAL DE ITACAIUNAS',
                    'FLORESTA NACIONAL DO JATUARANA': 'FLORESTA NACIONAL DE JATUARANA',
                    'FLORESTA NACIONAL DO PURUS': 'RESERVA EXTRATIVISTA DO MÉDIO PURÚS',
                    'FLORESTA NACIONAL DO TAPAJÓS': 'FLORESTA NACIONAL DE TAPAJÓS',
                    'FLORESTA NACIONAL DO TAPIRAPÉAQUIRI': 'FLORESTA NACIONAL DE TAPIRAPÉAQUIRI',
                    'FLORESTA NACIONAL MAPIÁ  INAUINI': 'FLORESTA NACIONAL DE MAPIÁINAUINÍ',
                    'PARQUE NACIONAL SERRA DA CUTIA': 'PARQUE NACIONAL DA SERRA DA CUTIA',
                    'RESERVA BIOLÓGICA NASCENTES DA SERRA DO CACHIMBO': 'RESERVA BIOLÓGICA NASCENTES SERRA DO CACHIMBO',
                    'RESERVA EXTRATIVISTA DO ALTO JURUÁ': 'RESERVA EXTRATIVISTA ALTO JURUÁ',
                    'RESERVA EXTRATIVISTA DO ALTO TARAUACÁ': 'RESERVA EXTRATIVISTA ALTO TARAUACÁ',
                    'RESERVA EXTRATIVISTA DO BAIXO JURUÁ': 'RESERVA EXTRATIVISTA BAIXO JURUÁ',
                    'RESERVA EXTRATIVISTA DO CIRIACO': 'RESERVA EXTRATIVISTA DO CIRIÁCO',
                    'RESERVA EXTRATIVISTA DO LAGO DO CUNIÃ': 'RESERVA EXTRATIVISTA LAGO DO CUNIÃ',
                    'RESERVA EXTRATIVISTA DO MÉDIO JURUÁ': 'RESERVA EXTRATIVISTA MÉDIO JURUÁ',
                    'RESERVA EXTRATIVISTA DO RIO CAJARI': 'RESERVA EXTRATIVISTA RIO CAJARI',
                    'RESERVA EXTRATIVISTA DO RIO DO CAUTÁRIO': 'RESERVA EXTRATIVISTA RIO CAUTÁRIO',
                    'RESERVA EXTRATIVISTA DO RIO OURO PRETO': 'RESERVA EXTRATIVISTA RIO OURO PRETO',
                    'RESERVA EXTRATIVISTA RIO UNINI': 'RESERVA EXTRATIVISTA DO RIO UNINI',
                    'RESERVA EXTRATIVISTA TAPAJÓSARAPIUNS': 'RESERVA EXTRATIVISTA TAPAJÓS ARAPIUNS',
                    'RESERVA EXTRATIVISTA TAPAJÓS-ARAPIUNS': 'RESERVA EXTRATIVISTA TAPAJÓS ARAPIUNS',
                    'RESERVA EXTRATIVISTA TERRA GRANDE  PRACUÚBA': 'RESERVA EXTRATIVISTA TERRA GRANDE PRACUUBA',
                    'RESERVA EXTRATIVISTA TERRA GRANDE - PRACUÚBA': 'RESERVA EXTRATIVISTA TERRA GRANDE PRACUUBA',
                    'ÁREA DE PROTEÇÃO AMBIENTAL DOS MEANDROS DO RIO ARAGUAIA': 'ÁREA DE PROTEÇÃO AMBIENTAL MEANDROS DO ARAGUAIA',
                    'ÁREA DE RELEVANTE INTERESSE ECOLÓGICO SERINGAL NOVA ESPERANÇA': 'ÁREA DE RELEVANTE INTERESSE ECOLÓGICA SERINGAL NOVA ESPERANÇA',
                    'ESTAÇÃO ECOLÓGICA JUAMI-JAPURÁ': 'ESTAÇÃO ECOLÓGICA JUAMIJAPURÁ',
                    'FLORESTA NACIONAL DE BALATA-TUFARI': 'FLORESTA NACIONAL DE BALATATUFARI',
                    'FLORESTA NACIONAL DE SARACÁ-TAQUERA': 'FLORESTA NACIONAL DE SARACÁTAQUERA',
                    'FLORESTA NACIONAL MAPIÁ - INAUINI': 'FLORESTA NACIONAL DE MAPIÁINAUINÍ',
                    'RESERVA EXTRATIVISTA AUATÍ-PARANÁ': 'RESERVA EXTRATIVISTA AUATÍPARANÁ',
                    'RESERVA EXTRATIVISTA DO CAZUMBÁ-IRACEMA': 'RESERVA EXTRATIVISTA DO CAZUMBÁIRACEMA',
                    'RESERVA EXTRATIVISTA GURUPÁ-MELGAÇO': 'RESERVA EXTRATIVISTA GURUPÁMELGAÇO',
                    'RESERVA EXTRATIVISTA IPAÚ-ANILZINHO': 'RESERVA EXTRATIVISTA IPAÚANILZINHO'}
    alerts_uc['UC'] = alerts_uc['UC'].replace(dic_correcao)
    gc_uc = alerts_uc.groupby('UC')['AREAMUNKM'].sum().reset_index()
    gc_uc['UC'] = gc_uc['UC'].replace(dic_correcao)
    

    def uc_geodf(state):
        first_alert = alerts_df[alerts_df['UC'] == state]['geometry'].iloc[0]
        representative_point = first_alert
        new_record = {
            'UC': state,
            'geometry': representative_point
        }
        return gpd.GeoDataFrame([new_record], crs=c_units_copy.crs)
    
    c_units_copy = pd.concat([c_units_copy, uc_geodf('ESTAÇÃO ECOLÓGICA DE CARACARAÍ')], ignore_index=True)
    c_units_copy = pd.concat([c_units_copy, uc_geodf('ESTAÇÃO ECOLÓGICA DE IQUÊ')], ignore_index=True)
    
    merge_ucs = pd.merge(c_units_copy, gc_uc, on='UC', how='left').fillna(0)


    
    #############       Folium       #############

    map = folium_map_init()

    style_legal_amazon = {'fillOpacity':0 ,'color' : '#117306', 'weight': 3}
    folium.GeoJson(legal_amazon, name = 'Legal Amazon', style_function= lambda x: style_legal_amazon).add_to(map)

    style_states = {'fillOpacity':0 ,'color' : '#117306', 'weight': 2}
    folium.GeoJson(states, name = 'States', style_function= lambda x: style_states).add_to(map)

    style_ucs = {'fillOpacity':0 ,'color' : '#3d1601', 'weight': 1}
    folium.GeoJson(c_units_copy, name = 'Conservation Units', style_function= lambda x: style_ucs).add_to(map)


    folium.Choropleth(geo_data=merge_ucs.to_json(),
                  name='Choropleth',
                  data=merge_ucs,
                  columns=['UC', 'AREAMUNKM'],
                  key_on = 'feature.properties.UC',
                  fill_color = 'YlOrRd',
                  nan_fill_color = 'white',
                  highlight = True,
                  legend_name='Affected Area in km²').add_to(map)
    
    marker_cluster = MarkerCluster().add_to(map)

    marker_cluster = folium_add_markers(marker_cluster,merge_ucs,merge_ucs, 2, alerts_uc, 'UC', 'UC', texts['total_dmg'], 'AREAMUNKM')

    folium.LayerControl().add_to(map)
    return map

In [21]:
dict_classes, texts = get_texts()
map = c_units_map()
save_map('C_Units_EN',map)

### LSTM

In [22]:
from tensorflow.keras.layers import Dropout, Bidirectional, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Function to build and train an improved robust LSTM model
def train_lstm(X_train, y_train, X_test, y_test):
    model = Sequential()
    
    # First LSTM layer with dropout and layer normalization
    model.add(Bidirectional(LSTM(128, activation='tanh', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))))
    model.add(LayerNormalization())
    model.add(Dropout(0.3))
    
    # Second LSTM layer with dropout and layer normalization
    model.add(LSTM(64, activation='tanh', return_sequences=True))
    model.add(LayerNormalization())
    model.add(Dropout(0.3))
    
    # Third LSTM layer
    model.add(LSTM(32, activation='tanh'))
    model.add(Dropout(0.2))
    
    # Dense output layer
    model.add(Dense(1))
    
    # Compile the model with MSE loss and Adam optimizer
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    
    # Callbacks for early stopping and learning rate reduction
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5)
    
    # Train the model
    model.fit(X_train, y_train, 
              epochs=100, 
              batch_size=32,
              validation_data=(X_test, y_test), 
              verbose=1, 
              callbacks=[early_stopping, reduce_lr])
    
    return model


2024-11-18 15:58:47.404440: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-18 15:58:47.415583: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 15:58:47.514176: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 15:58:47.828975: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731963527.941303  371035 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731963527.96

In [85]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Function to prepare data for LSTM
def prepare_data(df, feature, target, sequence_length=30):
    df = df.sort_values('VIEW_DATE')
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[[target]])
    
    # Create sequences
    X, y = [], []
    for i in range(len(scaled_data) - sequence_length):
        X.append(scaled_data[i:i+sequence_length])
        y.append(scaled_data[i+sequence_length])
    
    X = np.array(X)
    y = np.array(y)
    return X, y, scaler

# Function to predict and inverse transform
def predict_future(model, data, scaler, steps=10):
    predictions = []
    input_sequence = data[-1]
    
    for _ in range(steps):
        pred = model.predict(input_sequence[np.newaxis, :, :])
        predictions.append(pred[0][0])
        input_sequence = np.vstack((input_sequence[1:], pred))
    
    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Prepare and train LSTM for each state
sequence_length = 30
future_steps = 90

In [None]:
from sklearn.metrics import mean_squared_error, f1_score,recall_score
import numpy as np

# Initialize a dictionary to store model evaluation metrics
model_accuracies = {}
models = {}

# Iterate over each state to split data, train LSTM, and evaluate
for state in deter_states:
    var_name = f'df_deter_{state}'
    df = globals()[var_name]  # Access the dataframe dynamically
    df = df.sort_values('VIEW_DATE')  # Ensure data is sorted by date
    
    # Prepare the data
    X, y, scaler = prepare_data(df, feature='VIEW_DATE', target='AREAMUNKM_SUM', sequence_length=sequence_length)
    
    # Split into train and test sets
    split = int(0.8 * len(X))
    X_train, y_train = X[:split], y[:split]
    X_test, y_test = X[split:], y[split:]
    
    # Train the LSTM model
    model = train_lstm(X_train, y_train, X_test, y_test)
    models[state] = model  # Save the model
    
    # Evaluate on the test set
    y_pred = model.predict(X_test)
  
    
    y_test_inversed = scaler.inverse_transform(y_test.reshape(-1, 1))
    y_pred_inversed = scaler.inverse_transform(y_pred)
    
    # Calculate RMSE as the evaluation metric
    rmse = np.sqrt(mean_squared_error(y_test_inversed, y_pred_inversed))
    model_accuracies[state] = rmse

# Display model accuracies for each state
import pandas as pd
accuracy_df = pd.DataFrame.from_dict(model_accuracies, orient='index')
accuracy_df.index.name = 'State'
accuracy_df.reset_index(inplace=True)



Epoch 1/100


  super().__init__(**kwargs)


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - loss: 0.1531 - val_loss: 0.0024 - learning_rate: 0.0010
Epoch 2/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0072 - val_loss: 0.0024 - learning_rate: 0.0010
Epoch 3/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0032 - val_loss: 0.0028 - learning_rate: 0.0010
Epoch 4/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0026 - val_loss: 0.0025 - learning_rate: 0.0010
Epoch 5/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0028 - val_loss: 0.0024 - learning_rate: 5.0000e-04
Epoch 6/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0016 - val_loss: 0.0023 - learning_rate: 5.0000e-04
Epoch 7/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0019 - val_loss: 0.0026 - learning_rate: 5.0

  super().__init__(**kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 0.1902 - val_loss: 0.0069 - learning_rate: 0.0010
Epoch 2/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0160 - val_loss: 0.0044 - learning_rate: 0.0010
Epoch 3/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0101 - val_loss: 0.0048 - learning_rate: 0.0010
Epoch 4/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0077 - val_loss: 0.0046 - learning_rate: 0.0010
Epoch 5/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0066 - val_loss: 0.0046 - learning_rate: 0.0010
Epoch 6/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0074 - val_loss: 0.0044 - learning_rate: 5.0000e-04
Epoch 7/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0053 - val_loss: 0.0046 - learning_rate: 5.0000e

  super().__init__(**kwargs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 0.2398 - val_loss: 0.0072 - learning_rate: 0.0010
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0150 - val_loss: 0.0076 - learning_rate: 0.0010
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0077 - val_loss: 0.0068 - learning_rate: 0.0010
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0055 - val_loss: 0.0068 - learning_rate: 0.0010
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0044 - val_loss: 0.0067 - learning_rate: 0.0010
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0029 - val_loss: 0.0065 - learning_rate: 0.0010
Epoch 7/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0034 - val_loss: 0.0066 - learning_rate: 0.0010
Epoc

  super().__init__(**kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 110ms/step - loss: 0.1014 - val_loss: 0.0178 - learning_rate: 0.0010
Epoch 2/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0198 - val_loss: 0.0109 - learning_rate: 0.0010
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0108 - val_loss: 0.0090 - learning_rate: 0.0010
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0055 - val_loss: 0.0092 - learning_rate: 0.0010
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0064 - val_loss: 0.0087 - learning_rate: 0.0010
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0048 - val_loss: 0.0088 - learning_rate: 0.0010
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0037 - val_loss: 0.0088 - learning_rate: 0.0010
Epo

  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - loss: 0.3956 - val_loss: 9.6281e-04 - learning_rate: 0.0010
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 0.0424 - val_loss: 0.0081 - learning_rate: 0.0010
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.0216 - val_loss: 2.7219e-04 - learning_rate: 0.0010
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.0140 - val_loss: 0.0017 - learning_rate: 0.0010
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0092 - val_loss: 2.8855e-04 - learning_rate: 0.0010
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.0081 - val_loss: 2.6784e-04 - learning_rate: 0.0010
Epoch 7/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0063 - val_loss: 5.9750e-04 - learni

  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 94ms/step - loss: 0.4709 - val_loss: 0.0107 - learning_rate: 0.0010
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.0699 - val_loss: 0.0102 - learning_rate: 0.0010
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0470 - val_loss: 0.0027 - learning_rate: 0.0010
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0441 - val_loss: 0.0019 - learning_rate: 0.0010
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0235 - val_loss: 0.0015 - learning_rate: 0.0010
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0205 - val_loss: 0.0030 - learning_rate: 0.0010
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0312 - val_loss: 0.0012 - learning_rate: 0.0010
Epoch 8/100
[1m7/

  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 66ms/step - loss: 0.1080 - val_loss: 0.0142 - learning_rate: 0.0010
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.0290 - val_loss: 0.0109 - learning_rate: 0.0010
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 0.0246 - val_loss: 0.0108 - learning_rate: 0.0010
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0145 - val_loss: 0.0149 - learning_rate: 0.0010
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0128 - val_loss: 0.0100 - learning_rate: 0.0010
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0090 - val_loss: 0.0100 - learning_rate: 0.0010
Epoch 7/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0072 - val_loss: 0.0113 - learning_rate: 0.0010
Epoc

  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 127ms/step - loss: 0.1743 - val_loss: 0.1246 - learning_rate: 0.0010
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.1037 - val_loss: 0.0482 - learning_rate: 0.0010
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0737 - val_loss: 0.0328 - learning_rate: 0.0010
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.0435 - val_loss: 0.0284 - learning_rate: 0.0010
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0312 - val_loss: 0.0294 - learning_rate: 0.0010
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0285 - val_loss: 0.0291 - learning_rate: 0.0010
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0285 - val_loss: 0.0294 - learning_rate: 0.0010
Epoch 8/100
[1m5

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 54ms/step - loss: 0.1428 - val_loss: 0.0114 - learning_rate: 0.0010
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0261 - val_loss: 0.0094 - learning_rate: 0.0010
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0179 - val_loss: 0.0092 - learning_rate: 0.0010
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0125 - val_loss: 0.0135 - learning_rate: 0.0010
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0144 - val_loss: 0.0091 - learning_rate: 0.0010
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0114 - val_loss: 0.0093 - learning_rate: 0.0010
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0119 - val_loss: 0.0091 - learning_rate: 5.0000e-04


In [91]:
print(accuracy_df)

  State        RMSE        F1    Recall
0    PA  149.036345  0.375635  0.513889
1    MT   98.159904  0.513889  0.536232
2    AM   44.678125  0.373494  1.000000
3    RO   29.732971  0.474227  0.469388
4    MA   17.305738  0.408163  0.454545
5    TO    6.844799  0.846154  0.846154
6    RR  186.644138  0.000000  0.000000
7    AP   15.456640  0.000000  0.000000
8    AC   12.801708  0.441860  0.791667


In [None]:
import matplotlib.pyplot as plt

future_dataframes = {} 

# Plot train, test, and forecasted data for each state starting from 2023 as simple line plots
for state in deter_states:
    var_name = f'df_deter_{state}'
    df = globals()[var_name]  # Access the dataframe dynamically
    df = df.sort_values('VIEW_DATE')  # Ensure data is sorted by date

    # Filter data for dates from 2023 onward
    df = df[df['VIEW_DATE'] >= '2023-01-01']

    # Prepare the data
    X, y, scaler = prepare_data(df, feature='VIEW_DATE', target='AREAMUNKM_SUM', sequence_length=sequence_length)
    
    # Split into train and test sets
    split = int(0.8 * len(X))
    X_train, y_train = X[:split], y[:split]
    X_test, y_test = X[split:], y[split:]
    
    # Adjust corresponding dates for train and test sets
    train_dates = df['VIEW_DATE'].iloc[sequence_length:split + sequence_length].values  # Match the train set
    test_dates = df['VIEW_DATE'].iloc[split + sequence_length:].values  # Match the test set
    
    # Get the corresponding model
    model = models[state]
    
    # Make predictions for test set
    y_pred = model.predict(X_test)
    y_test_inversed = scaler.inverse_transform(y_test.reshape(-1, 1))
    y_pred_inversed = scaler.inverse_transform(y_pred)
    
    # Generate future predictions
    future_preds = predict_future(model, X, scaler, steps=future_steps)
    future_dates = pd.date_range(start=test_dates[-1], periods=future_steps + 1, freq='D')[1:]  # Generate future dates

    # Scale back to original values for visualization
    y_train_inversed = scaler.inverse_transform(y_train.reshape(-1, 1))
    
    future_df = pd.DataFrame({
        'Future_Dates': future_dates,
        'Future_Preds': future_preds.flatten()  # Flatten to ensure it's a 1D array
    })
    future_dataframes[state] = future_df 
    

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15

In [79]:
combined_df = pd.concat(
    [df.assign(State=state) for state, df in future_dataframes.items()],
    ignore_index=True
)

combined_df.rename(columns={"Future_Dates": "future_dates", "Future_Preds": "AREAMUNKM", "State": "UF"}, inplace=True)

combined_df.to_csv('future_preds.csv', index=False)

# Convert 'future_dates' to datetime for proper grouping
combined_df['future_dates'] = pd.to_datetime(combined_df['future_dates'])

# Add month and year columns for grouping
combined_df['year'] = combined_df['future_dates'].dt.year
combined_df['month'] = combined_df['future_dates'].dt.month

# Group by 'state', 'year', and 'month', then aggregate 'future_preds' using sum
grouped_df = combined_df.groupby(['UF', 'year', 'month'])['AREAMUNKM'].sum().reset_index()
grouped_df['CLASSNAME'] = None

In [41]:
print(df_deter.head())

              CLASSNAME  VIEW_DATE  AREAUCKM    UC  AREAMUNKM    MUNICIPALI  \
0  CICATRIZ_DE_QUEIMADA 2018-01-11       0.0  None   0.459839  Monte Alegre   
1  CICATRIZ_DE_QUEIMADA 2018-01-14       0.0  None   0.340975      Itaituba   
2  CICATRIZ_DE_QUEIMADA 2024-09-26       0.0  None   1.373554   Marcelandia   
3      DESMATAMENTO_VEG 2018-01-14       0.0  None   0.070781      Altamira   
4  CICATRIZ_DE_QUEIMADA 2018-01-14       0.0  None   0.149432      Itaituba   

  GEOCODIBGE  UF   ANO  MES  MES/ANO  
0    1504802  PA  2018    1  2018-01  
1    1503606  PA  2018    1  2018-01  
2    5105580  MT  2024    9  2024-09  
3    1500602  PA  2018    1  2018-01  
4    1503606  PA  2018    1  2018-01  


In [48]:
print(grouped_df)

    UF  year  month  future_preds CLASSNAME
0   AC  2024      9     64.976326      None
1   AC  2024     10    250.915955      None
2   AC  2024     11    224.935074      None
3   AC  2024     12    180.289520      None
4   AM  2024      9     83.937744      None
5   AM  2024     10    282.729889      None
6   AM  2024     11    425.616821      None
7   AM  2024     12    350.601898      None
8   AP  2024      9     15.631524      None
9   AP  2024     10    148.151108      None
10  AP  2024     11    146.024323      None
11  AP  2024     12    122.063965      None
12  MA  2024      9     10.975651      None
13  MA  2024     10    109.655182      None
14  MA  2024     11    103.838028      None
15  MA  2024     12     89.453255      None
16  MT  2024      9    589.201782      None
17  MT  2024     10   3667.200928      None
18  MT  2024     11   1423.825073      None
19  MT  2024     12    946.959534      None
20  PA  2024      9    419.547729      None
21  PA  2024     10   1495.35852

### Future Predictions

In [83]:
def states_map_pred(selected_month, selected_year):
    """
    Generates a Folium map for a specific month and year with choropleth and markers.
    Assumes the necessary datasets (`states`, `grouped_df`, `df_deter`) are globally available.
    
    Returns:
        folium.Map: A map with a choropleth and markers.
    """
    ############# Data Preparation #############
    # Define month and year for the map

    # Filter grouped_df for the selected month and year
    filtered_data = grouped_df[(grouped_df['month'] == selected_month) & (grouped_df['year'] == selected_year)]

    if filtered_data.empty:
        raise ValueError(f"No data available for {selected_year}-{selected_month:02d} in grouped_df.")

    # Merge filtered_data with states GeoDataFrame
    states_copy = states.copy()
    states_copy = states_copy.rename(columns={'sigla': 'UF'})  # Rename for consistency
    map_data = states_copy.merge(filtered_data, left_on='UF', right_on='UF', how='left')
    map_data.drop(columns = ["UF"],inplace = True)
    filtered_data["NOME_ESTADO"] = filtered_data["UF"].map(states_dict)
    filtered_data['NOME_SIGLA'] = filtered_data['NOME_ESTADO'] + ' (' + filtered_data["UF"] + ')' 
    filtered_data = filtered_data.reset_index()
    
    print(filtered_data.head())
    
   
    f_deter = alerts_df.copy()
    gb_uf = grouped_df.groupby('UF')['AREAMUNKM'].sum().sort_values(ascending=False)
    gb_uf = pd.DataFrame(gb_uf)
    gb_uf['NOME_ESTADO'] = gb_uf.index.map(states_dict)
    gb_uf['NOME_SIGLA'] = gb_uf['NOME_ESTADO'] + ' (' + gb_uf.index + ')' 
    gb_uf = gb_uf.reset_index()
    
    # print(gb_uf.head())

    states_copy = states.copy()
    states_copy = states_copy.rename(columns={'sigla': 'UF'})
    ############# Folium Map Initialization #############
    # Initialize the map
    map = folium_map_init()

    # Add state borders
    style_states = {'fillOpacity': 0.3, 'color': '#005f73', 'weight': 2}
    folium.GeoJson(map_data, name='States', style_function=lambda x: style_states).add_to(map)

    folium.Choropleth(
        geo_data=states_copy,
        data=gb_uf,
        columns=['UF', 'AREAMUNKM'],
        key_on='feature.properties.UF',
        fill_color='Reds',
        fill_opacity=0.7,
        line_opacity=0.2,
        nan_fill_color='white',
        bins=8,
        highlight=True,
        legend_name='Future Predicted Area in km²',
        name='Predicted Affected States'
    ).add_to(map)

    # Add markers with detailed popups
    map = folium_add_markers_pred(
        container=map,
        df_data=filtered_data,
        geo_df=states_copy,
        get_centroid_mode=1,
        grouped_df=grouped_df,
        key='UF',  # Use 'UF' as the key from states_copy
        popup_title_column='NOME_SIGLA',  # Use 'state' from grouped_df for popups
        popup_total_area_text='Total Damaged Area:',
        total_area_column='AREAMUNKM'
    )
    


    # Add layer control
    folium.LayerControl().add_to(map)

    return map

In [84]:
dict_classes, texts = get_texts()
selected_months = [10,11,12]
selected_year = 2024

for selected_month in selected_months:
    map = states_map_pred(selected_month,selected_year)
    save_map(f'States_pred_{selected_month}_{selected_year}_EN',map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["NOME_ESTADO"] = filtered_data["UF"].map(states_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['NOME_SIGLA'] = filtered_data['NOME_ESTADO'] + ' (' + filtered_data["UF"] + ')'


   index  UF  year  month    AREAMUNKM CLASSNAME  NOME_ESTADO  \
0      1  AC  2024     10   250.915955      None         Acre   
1      5  AM  2024     10   282.729889      None     Amazonas   
2      9  AP  2024     10   148.151108      None        Amapá   
3     13  MA  2024     10   109.655182      None     Maranhão   
4     17  MT  2024     10  3667.200928      None  Mato Grosso   

         NOME_SIGLA  
0         Acre (AC)  
1     Amazonas (AM)  
2        Amapá (AP)  
3     Maranhão (MA)  
4  Mato Grosso (MT)  



  geo_df_proj['centroid'] = geo_df_proj.geometry.centroid

  geo_df_proj['latitude'] = geo_df_proj.centroid.y

  geo_df_proj['longitude'] = geo_df_proj.centroid.x
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["NOME_ESTADO"] = filtered_data["UF"].map(states_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['NOME_SIGLA'] = filtered_data['NOME_ESTADO'] + ' (' + filtered_data["UF"] + ')'


   index  UF  year  month    AREAMUNKM CLASSNAME  NOME_ESTADO  \
0      2  AC  2024     11   224.935074      None         Acre   
1      6  AM  2024     11   425.616821      None     Amazonas   
2     10  AP  2024     11   146.024323      None        Amapá   
3     14  MA  2024     11   103.838028      None     Maranhão   
4     18  MT  2024     11  1423.825073      None  Mato Grosso   

         NOME_SIGLA  
0         Acre (AC)  
1     Amazonas (AM)  
2        Amapá (AP)  
3     Maranhão (MA)  
4  Mato Grosso (MT)  



  geo_df_proj['centroid'] = geo_df_proj.geometry.centroid

  geo_df_proj['latitude'] = geo_df_proj.centroid.y

  geo_df_proj['longitude'] = geo_df_proj.centroid.x
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["NOME_ESTADO"] = filtered_data["UF"].map(states_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['NOME_SIGLA'] = filtered_data['NOME_ESTADO'] + ' (' + filtered_data["UF"] + ')'


   index  UF  year  month   AREAMUNKM CLASSNAME  NOME_ESTADO        NOME_SIGLA
0      3  AC  2024     12  180.289520      None         Acre         Acre (AC)
1      7  AM  2024     12  350.601898      None     Amazonas     Amazonas (AM)
2     11  AP  2024     12  122.063965      None        Amapá        Amapá (AP)
3     15  MA  2024     12   89.453255      None     Maranhão     Maranhão (MA)
4     19  MT  2024     12  946.959534      None  Mato Grosso  Mato Grosso (MT)



  geo_df_proj['centroid'] = geo_df_proj.geometry.centroid

  geo_df_proj['latitude'] = geo_df_proj.centroid.y

  geo_df_proj['longitude'] = geo_df_proj.centroid.x


In [None]:
import pandas as pd

# Initialize an empty dataframe to store the merged predictions
merged_predictions = pd.DataFrame()

# Iterate over each state to merge predicted data
for state in deter_states:
    
    pred_start_idx = sequence_length  # The first prediction index
    pred_dates = df['VIEW_DATE'].iloc[pred_start_idx:pred_start_idx + len(y_pred_inversed)].values  # Match dates with predictions
    
    # Create a dataframe for predictions
    pred_df = pd.DataFrame({
        'DATE': pred_dates,
        f'PREDICTED_{state}_AREAMUNKM_SUM': y_pred_inversed.flatten()
    })
    
    # Merge with the main dataframe based on the date
    if merged_predictions.empty:
        merged_predictions = pred_df
    else:
        merged_predictions = pd.merge(merged_predictions, pred_df, on='DATE', how='outer')
# Optionally, save it to a CSV file for further use
merged_predictions.to_csv('merged_predictions.csv', index=False)



In [44]:
df_deter['CLASSNAME'].unique()

array(['CICATRIZ_DE_QUEIMADA', 'DESMATAMENTO_VEG', 'DESMATAMENTO_CR',
       'MINERACAO', 'DEGRADACAO', 'CS_DESORDENADO', 'CS_GEOMETRICO'],
      dtype=object)