# Capitolo 5: Dentmax, Exploratory Data Analysis

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr

# Read the data
df = pd.read_excel("data/Capitolo5e6-DentMax.xlsx")

# Display the first few rows of the dataframe
df.head()


## Statistica descrittiva

In [None]:
df.describe()

In [None]:
# Compute correlation matrix
corr = df.corr()

# Plot the heatmap
plt.figure(figsize=(16, 12))
sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.show()


In [None]:
# Identify numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

# Compute correlation for numeric columns
corr = df[numeric_cols].corr()

# For p-values, we'll initialize an empty DataFrame with the same shape as the correlation matrix
pvals = pd.DataFrame(index=numeric_cols, columns=numeric_cols)

# Populate the p-value matrix for only numeric columns
for i in numeric_cols:
    for j in numeric_cols:
        _, p = pearsonr(df[i], df[j])
        pvals.loc[i, j] = p

# Mask for significant correlations: Let's assume significance level is 0.05
mask_significant = pvals > 0.05

# Plot the heatmap with correlations and mask non-significant ones
plt.figure(figsize=(18, 14))
sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1, mask=mask_significant, cbar_kws={'label': 'Pearson Correlation'})

# Ensure row and column names are displayed
plt.xticks(rotation=90)  # Optional: This rotates x labels for better visibility if they're long.
plt.yticks(rotation=0)   # This ensures y labels are horizontal.

plt.show()
