In [1]:
import numpy as np
import pandas as pd
from scipy.stats import shapiro, kstest, kendalltau
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
path = "../dataset/data.csv"
df = pd.read_csv(path, parse_dates=['Date'])

In [3]:
climate = df[['Date', 'T', 'LMLT', 'EFVT', 'PE', 'SP', 'SR', 'UW', 'VW', 'RH', 'ST',
       'ET0', 'SPEI', 'EHF', 'TSM', 'CHL', 'CDOM']]
sentinel = df [['Date','B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
       'GNDVI', 'NDTI', 'BR', 'SPEI', 'EHF', 'TSM', 'CHL', 'CDOM']]

In [4]:
def correlation(data, name):
    if name == "dataset":
        fontsize = 13
    else:
        fontsize = 16
        
    data = data.drop(["Date"], axis=1)
    fig, ax = plt.subplots(figsize=(25, 18), dpi=300)
    corr = data.corr(method="pearson")
    mask = np.triu(np.ones_like(corr, dtype=bool))
    np.fill_diagonal(mask, False)
    
    heatmap = sns.heatmap(
            corr, 
            mask=mask, 
            vmin=-1, 
            vmax=1, 
            annot=True, 
            fmt=".2f", 
            cmap='coolwarm', 
            annot_kws={"size": fontsize, "weight": "bold"},
            cbar_kws={"shrink": .6, "ticks": np.linspace(-1, 1, 5)},
            ax=ax
        )

    ax.tick_params(axis='x', labelsize=fontsize, pad=10)
    ax.tick_params(axis='y', labelsize=fontsize, rotation=0, pad=10)
        
    colorbar = heatmap.collections[0].colorbar
    colorbar.set_label('r', fontsize=22, rotation=0)
    colorbar.ax.tick_params(labelsize=fontsize)

    plt.savefig(f'../plots/correlation_{name}.png', 
                dpi=300, 
                bbox_inches='tight',
                facecolor='white')
    plt.close()

In [5]:
correlation(df, "dataset")
correlation(climate, "climate")
correlation(sentinel, "sentinel")

In [6]:
def seasonal_correlation(data, name):
    if name == "dataset":
        fontsize = 13
    else:
        fontsize = 16
    seasons = {
        'spring': [3, 4, 5],
        'summer': [6, 7, 8],
        'autumn': [9, 10, 11],
        'winter': [12, 1, 2]
    }
    
    for season, months in seasons.items():
        fig, ax = plt.subplots(figsize=(25, 18), dpi=300)
        
        season_data = data[data['Date'].dt.month.isin(months)].drop(["Date"], axis=1)
        corr = season_data.corr(method="pearson")
        
        mask = np.triu(np.ones_like(corr, dtype=bool))
        np.fill_diagonal(mask, False)
        
        heatmap = sns.heatmap(
            corr, 
            mask=mask, 
            vmin=-1, 
            vmax=1, 
            annot=True, 
            fmt=".2f", 
            cmap='coolwarm', 
            annot_kws={"size": fontsize, "weight": "bold"},
            cbar_kws={"shrink": .6, "ticks": np.linspace(-1, 1, 5)},
            ax=ax
        )
        
        ax.tick_params(axis='x', labelsize=fontsize, pad=10)
        ax.tick_params(axis='y', labelsize=fontsize, rotation=0, pad=10)
        
        colorbar = heatmap.collections[0].colorbar
        colorbar.set_label('r', fontsize=22, rotation=0)
        colorbar.ax.tick_params(labelsize=fontsize)

        plt.savefig(f'../plots/correlation_{name}_{season}.png', 
                   dpi=300, 
                   bbox_inches='tight',
                   facecolor='white')
        plt.close()

In [7]:
seasonal_correlation(df, "dataset")
seasonal_correlation(climate, "climate")
seasonal_correlation(sentinel, "sentinel")