<a href="https://colab.research.google.com/github/dennistay1981/Resources/blob/main/Data_science_approaches_to_mental_health.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Descriptive analytics: cross-tabulating metaphor sources and targets

In [None]:
#Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency

#Import data
data = pd.read_csv('https://raw.githubusercontent.com/dennistay1981/Resources/refs/heads/main/Code%20and%20data%20in%20publications/Chapter%3A%20Data%20science%20approaches%20to%20metaphor%20and%20mental%20health/Descriptive.csv')

# Cross-tabulation
crosstab = pd.crosstab(data['SOURCE'], data['TARGET'])

# Observed frequencies
observed = crosstab.values

# Expected frequencies
chi2, p, dof, expected = chi2_contingency(observed)

# Pearson's residuals
residuals = (observed - expected) / np.sqrt(expected)

# Cramer's V
n = observed.sum()
phi2 = chi2 / n
r, k = observed.shape
phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
rcorr = r - ((r - 1)**2) / (n - 1)
kcorr = k - ((k - 1)**2) / (n - 1)
cramer_v = np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))

# Heatmap with observed frequencies
plt.figure(figsize=(12, 8))
sns.heatmap(crosstab, annot=True, fmt='d', cmap='Blues', cbar=True)
plt.title('Cross-tabulation of Metaphor Sources and Targets')
plt.xlabel('Target')
plt.ylabel('Source')
plt.show()

# Heatmap with Pearson's residuals
plt.figure(figsize=(12, 8))
sns.heatmap(residuals, annot=True, fmt=".2f", cmap='Reds', cbar=True)
plt.title('Pearson Residuals')
plt.xlabel('Target')
plt.ylabel('Source')
plt.show()

print(f"Chi-square statistic: {chi2:.2f}")
print(f"P-value: {p:.3f}")
print(f"Degrees of freedom: {dof}")
print(f"Cramer's V: {cramer_v:.3f}")