## Introduction

This notebook provides a few plots demonstrating outlier detection and correlation using different methods (e.g. Z-score method) and graphs (e.g. hisotgram, box plot).

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ginamanou/avalanche-susceptibility/blob/main/5_Some_plots.ipynb)

### Import packages 

In [None]:
from dateutil.parser import parse
from datetime import datetime

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns

import xarray as xr
import numpy as np
import pandas as pd

import os
import glob
import pathlib
from pathlib import Path

#plt.rcParams.update({'figure.figsize': (15, 7), 'figure.dpi': 120})

### Negative rainfall

In [None]:
df = pd.read_csv("D:/Allaus/Data_analysis/Altres_estacions_from_meteo/2_no_nulls_2022/Borda_Vidal.csv")
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
#df

In [None]:
df.rain.min()

In [None]:
df.date[0]

In [None]:
plt.figure(figsize=(12, 8))
sns.set_context("paper", rc={"font.size":14,"axes.titlesize":14,"axes.labelsize":14, 
                             "xtick.labelsize":14, "ytick.labelsize":14, 'legend.fontsize': 14})   
fig = sns.scatterplot(x=df.date, y=df.rain, color='b')
fig.set(xlabel = 'Year', ylabel = 'Rainfall (mm)')
fig.set_xticklabels(fig.get_xticklabels(), rotation=45)
#fig.tick_params(labelrotation=15)
fig.grid()
#plt.suptitle("Borda Vidal station", y=0.94)
#fig.set_xlim(df.date[0], df.date[5296])
plt.savefig('D:/Allaus/Manuscript/pictures_and_figures/negative rain - Borda Vidal.png', dpi=300)
plt.show();

### Outlier detection - **Z-score method**

For temp_min

In [None]:
df = pd.read_csv("D:/Allaus/Data_analysis/Neu_from_meteo/7_map/Soldeu.csv")
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')
df

In [None]:
sns.set_context("paper", rc={"font.size":14,"axes.titlesize":14,"axes.labelsize":14, 
                             "xtick.labelsize":14, "ytick.labelsize":14, 'legend.fontsize': 14})
fig = sns.displot(df.temp_min)
fig.set(xlabel = 'Minimum temperature (°)')
fig.set(xlim=(-30, 30))
fig.set(ylim=(0, 320))
#fig.set(title="Soldeu station")
plt.savefig('D:/Allaus/Manuscript/pictures_and_figures/outliers - temp min - Soldeu.png', dpi=300)
plt.show();

For temp_max

In [None]:
df = pd.read_csv("D:/Allaus/Data_analysis/Neu_from_meteo/7_map/Pal.csv")
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')
df

In [None]:
plt.figure(figsize=(5,5))
sns.set_context("paper", rc={"font.size":14,"axes.titlesize":14,"axes.labelsize":14, 
                             "xtick.labelsize":14, "ytick.labelsize":14, 'legend.fontsize': 14})
fig = sns.boxplot(df.temp_max)
fig.set(ylabel = 'Maximum temperature (°)')
#fig.set(xlim=(-3, 3))
# fig.set(ylim=(0, 320))
#fig.set(title="Pal station")
plt.savefig('D:/Allaus/Manuscript/pictures_and_figures/outliers - temp max - Pal.png', dpi=300, bbox_inches='tight')
plt.show();

### Threshold of extreme rainfall

In [None]:
df = pd.read_csv("D:/Allaus/Data_analysis/Altres_estacions_from_meteo/7_temps_outliers/La_Comella.csv")
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
df

In [None]:
#df.date[5843]

In [None]:
plt.figure(figsize=(12, 8))
sns.set_context("paper", rc={"font.size":15,"axes.titlesize":15,"axes.labelsize":15, 
                             "xtick.labelsize":15, "ytick.labelsize":15, 'legend.fontsize': 15})
fig = sns.scatterplot(x=df.date, y=df.rain, color='b')
fig.set(xlabel = 'Year', ylabel = 'Rainfall (mm)')
fig.set_xticklabels(fig.get_xticklabels(), rotation=45)
#fig.tick_params(labelrotation=15)
#fig.grid()
plt.axhline(y=df.rain.quantile(q=0.95),color='r') 
#plt.suptitle("La Comella station", y=0.94)
fig.set_xlim(df.date[0], df.date[5843])
plt.savefig('D:/Allaus/Manuscript/pictures_and_figures/extreme rain threshold - La Comella.png', dpi=300)
plt.show();

### Correlation

In [None]:
df = pd.read_csv("D:/Allaus/Data_analysis/Neu_from_meteo/15_temp_range_temp_trend/Pal.csv")
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
df

In [None]:
corr=df.corr()
corr.style.background_gradient(cmap='coolwarm')    

In [None]:
df.snow_tot.corr(df.temp_range)

In [None]:
temps = df[['temp_max', 'temp_min']]
temps

sns.pairplot(temps) # or sns.pairplot(num)

In [None]:
print(sns.color_palette("colorblind").as_hex())

In [None]:
#sns.color_palette(palette='colorblind')
plt.figure(figsize=(12,8))
sns.set_context("paper", rc={"font.size":15,"axes.titlesize":15,"axes.labelsize":15, 
                             "xtick.labelsize":15, "ytick.labelsize":15, 'legend.fontsize': 15})
fig = sns.regplot(x=df.temp_max, y=df.temp_range, scatter_kws = {"color": "#ece133"}, line_kws = {"color": "#d55e00"})  
fig.set(xlabel = 'Maximum temperature (°C)', ylabel='Range of daily temperatures')
fig.grid()
#fig.set(xlim=(-20,30))
#plt.suptitle("Pal station", y=0.92)

plt.savefig('D:/Allaus/Manuscript/pictures_and_figures/temp correlation - Pal.png', dpi=300)
plt.show();