# **Exploratory Data Analysis of Climatic Variables**

to be written

In [None]:
# Libraries
import pandas as pd
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Data
df = pd.read_csv('AVONET.csv' )

## **1. Body Size (Mass)

### **Mass and Temperature**

To choose the proper statistical test, Shapiro-Wilk test is done, in which

* H0: The Mass is normally distributed.
* H1: The Mass is not normally distributed.

In [None]:
shapiro_w, shapiro_p = stats.shapiro(df['Mass'])

print(f"N: {len(df['Mass'])}")
print(f"p-value: {shapiro_p:.4e}")

Since p << 0.05, the null hypothesis (H0) is rejected. The data is not normally distributed. Therefore, parametric tests such as Pearson cannot be used. As an alternative, Spearman test is used, in which

* H0: There is no monotonic relationship between Mass and Minimum temperature.
* H1: There is a monotonic relationship between Mass and Minimum temperature.

In [None]:
correlation_rho, p_value_spearman = stats.spearmanr(df['bio_6'], df['Mass'])

print(f"Spearman rho: {correlation_rho:.4f}")
print(f"p-value: {p_value_spearman:.4e}")

* H0: There is no monotonic relationship between Mass and Maximum temperature.
* H1: There is a monotonic relationship between Mass and Maximum temperature.

In [None]:
correlation_rho, p_value_spearman = stats.spearmanr(df['bio_5'], df['Mass'])

print(f"Spearman rho: {correlation_rho:.4f}")
print(f"p-value: {p_value_spearman:.4e}")

* H0: There is no monotonic relationship between Mass and Average temperature.
* H1: There is a monotonic relationship between Mass and Average temperature.

In [None]:
correlation_rho, p_value_spearman = stats.spearmanr(df['bio_1'], df['Mass'])

print(f"Spearman rho: {correlation_rho:.4f}")
print(f"p-value: {p_value_spearman:.4e}")

For minimum and maximum temperature, p < 0.05, thus the null hypothesis (H0) is rejected. There is a statistically significant relationship between mass-minimum temperature and mass-maximum temperature. However, for average temperature, p > 0.05, thus the null hypothesis is failed to reject. Finally, to illustrate the relationship, scatter plots are created (with log(Mass) for better visualization).

In [None]:
temp_cols = ['bio_6', 'bio_1', 'bio_5']

df_temp = df[['log_Mass'] + temp_cols].copy() # log_Mass'ı doğrudan kullan
df_melted = df_temp.melt(id_vars='log_Mass', var_name='Temperature_Type', value_name='Temperature_Value')

fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Log(Mass) vs. Temperature', fontsize=18)

label_map = {
    'bio_6': 'Minimum Temperature', 
    'bio_1': 'Average Temperature', 
    'bio_5': 'Maximum Temperature'
}

for i, col in enumerate(temp_cols):
    subset = df_melted[df_melted['Temperature_Type'] == col]

    rho, p = stats.spearmanr(subset['Temperature_Value'], subset['log_Mass'])

    sns.regplot(x='Temperature_Value', y='log_Mass', data=subset, ax=axes[i], 
        scatter_kws={'alpha': 0.1, 's': 10}, 
            line_kws={'color': 'red', 'lw': 2, 'label': f'Spearman Trend Line (ρ = {rho:.3f}, p = {p:.2e})'})

    axes[i].set_title(f"{label_map[col]}", fontsize=14)
    axes[i].set_xlabel(f"{label_map[col]} (°C)")
    axes[i].set_ylabel("Log(Mass) (log(g))" if i == 0 else "") 
    axes[i].legend(loc='upper right', fontsize=10)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()