# ML/DL techniques in the reviewed articles

This notebook summarises the ML/DL techniques used in the reviewd articles per hazard:
-Air pollution: 534 articles considered for the following plots (the ones with at least 1 citation).
-Urban Heat Island (UHI): all articles considered.
-Flood: all articles considered.
-Landslide: 402/511 articles considered for the following plots (the ones with at least 1 citation).

In [13]:
import pandas as pd
import plotly.express as px
pd.set_option('display.max_rows', 500)

In [74]:
#Path definition
root = './subselected_articles/'
air_pollution_path = root + 'AP.csv'
air_pollution_legend_path = root + 'AP_legend.csv'
heat_island_path = root + 'UHI.csv'
heat_island_legend_path = root + 'UHI_legend.csv'
flooding_path = root + 'flooding.csv'
flooding_legend_path = root + 'flooding_legend.csv'
landslides_path = root + 'landslides.csv'
landslides_legend_path = root + 'landslides_legend.csv'

#Reading files
air_pollution = pd.read_csv(air_pollution_path, index_col=0)
air_pollution_legend = pd.read_csv(air_pollution_legend_path, index_col=0)
heat_island = pd.read_csv(heat_island_path, index_col=0)
heat_island_legend = pd.read_csv(heat_island_legend_path, index_col=0)
flooding = pd.read_csv(flooding_path, index_col=0)
flooding_legend = pd.read_csv(flooding_legend_path, index_col=0)
landslides = pd.read_csv(landslides_path, index_col=0)
landslides_legend = pd.read_csv(landslides_legend_path, index_col=0)

In [52]:
def plot_data(df, set_style=False, log=False, threshold=25):
    # Function to filter text based on the threshold
    def filter_text(count, tag):
        return tag if count >= threshold else ""
    
    # Apply the filter function to the 'Tag' column
    df['Filtered Tag'] = df.apply(lambda row: filter_text(row['Count'], row['Tag']), axis=1)
    
    fig = px.bar(df, x="Category", y="Count", color="Count", text="Filtered Tag", color_continuous_scale="blugrn", log_y=log)#.update_layout(
        #template='plotly_white',
        #plot_bgcolor='rgba(0, 0, 0, 0)',
        #paper_bgcolor='rgba(0, 0, 0, 0)',
    #)
    
    fig.update_traces(textposition='inside')
    
    if set_style:
        fig.update_layout(
            width=1800, height=1300,
            xaxis_title="ML/DL categories",
            yaxis_title="Articles count",
            legend_title="Count",
            font=dict(
                #family="Courier New, monospace",
                size=30,
            )
        )
    
    fig.show()


In [58]:
def getTechniqueCount(articles, legend):
    copyOfLegend = legend
    copyOfArticles = articles
    copyOfLegend['Count'] = 0
    for index, row in copyOfArticles.iterrows():    
        methods_line = row['ML/DL techniques']

        if methods_line and isinstance(methods_line, str):
            #print('method ' + row['ML/DL techniques'])
            methods = methods_line.split(',')
            for method in methods:
                if ('(' in method):
                    method = method.split('(')[0]
                method = method.strip()
                if method in copyOfLegend.index:
                    copyOfLegend.at[method, 'Count'] += 1
                else:
                    print(method)
    copyOfLegend = copyOfLegend.sort_values('Count',ascending=False)
    #legend = legend.sort_values(by=['Count','Category'], ascending=[False,False])
    copyOfLegend = copyOfLegend.reset_index()
    copyOfLegend = copyOfLegend[copyOfLegend['Category'] != 'XAI']
    return copyOfLegend

## Air pollution

In [63]:
air_pollution_counts = getTechniqueCount(air_pollution, air_pollution_legend)
plot_data(air_pollution_counts,False, False, 0)

RT


In [70]:
air_pollution_counts

Unnamed: 0,Tag,Full Name,Category,Count,Filtered Tag
0,LSTM,Long Short Term Memory,NN,203,LSTM
1,RF,Random Forest,ENS,166,RF
2,CNN,Convolutional Neural Networks,NN,81,CNN
3,SVM,Support Vector Machine,SVM,81,SVM
4,XGB,Extreme Gradient Boost,ENS,65,XGB
5,LR,Linear Regression,LR,62,LR
6,GRU,Gated Recurrent Unit,NN,54,GRU
7,ANN,Artificial Neural Network,NN,48,ANN
8,MLP,MultiLayer Perceptron,NN,35,MLP
9,GBDT,Gradient Boosting Decision Trees,ENS,34,GBDT


## Urban heat island

In [67]:
UHI_counts = getTechniqueCount(heat_island, heat_island_legend)
plot_data(UHI_counts,False, False, 0)

CA


In [71]:
UHI_counts

Unnamed: 0,Tag,Full Name,Category,Count,Filtered Tag
0,RFR,Random Forest Regression,ENS,10,RFR
1,ANN,Artificial Neural Network,NN,9,ANN
2,LR,Linear Regression(regularization),LR,8,LR
3,RF,Random Forest,ENS,5,RF
4,SVR,Support Vector Regression,SVM,4,SVR
5,GBRT,Gradient Boosted Regression Trees,ENS,3,GBRT
6,BN,Bayesian Network,BAY,2,BN
7,MLP,Multi-Layer Perceptron,NN,2,MLP
8,BR,Bayesian Regression (regularization),BAY,1,BR
9,MANN,Model Averaged Neural Network,NN,1,MANN


## Flood

In [68]:
flood_counts = getTechniqueCount(flooding, flooding_legend)
plot_data(flood_counts,False, False, 0)




In [72]:
flood_counts

Unnamed: 0,Tag,Full Name,Category,Count,Filtered Tag
0,RF,Random Forest,ENS,105,RF
1,ANN,Artificial Neural Network,NN,75,ANN
2,SVM,Support Vector Machine,SVM,65,SVM
3,XGB,Extreme Gradient Boosting,ENS,30,XGB
4,KNN,K-Nearest-Neighbor,IB,26,KNN
5,CNN,Convolutional Neural Network,NN,21,CNN
6,DNN,Deep Neural Network,NN,21,DNN
7,NB,Naive Bayes,BAY,20,NB
8,BRT,Boosted Regression Tree,ENS,20,BRT
9,DT,Decision Trees,DT,19,DT


## Landslides

In [69]:
landslides_counts = getTechniqueCount(landslides, landslides_legend)
plot_data(landslides_counts,False, False, 0)




In [73]:
landslides_counts

Unnamed: 0,Tag,Full Name,Category,Count,Filtered Tag
0,RF,Random Forest,ENS,200,RF
1,SVM,Support Vector Machine,SVM,151,SVM
2,LOGR,Logistic Regression,LR,108,LOGR
3,ANN,Artificial Neural Network,NN,106,ANN
4,CNN,Convolutional Neural Network,NN,48,CNN
5,XGB,Extreme Gradient Boosting,ENS,43,XGB
6,DNN,Deep Neural Network,NN,34,DNN
7,KNN,K-Nearest-Neighbor,IB,29,KNN
8,NB,Naive Bayes,BAY,29,NB
9,DT,Decision Trees,DT,27,DT
