In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway  # For ANOVA

# Load cleaned datasets for Benin, Sierra Leone, and Togo
benin_df = pd.read_csv('../data/benin_clean.csv')  # Adjust the path as needed
sierraleone_df = pd.read_csv('../data/sierraleone_clean.csv')  # Adjust the path as needed
togo_df = pd.read_csv('../data/togo_clean.csv')  # Adjust the path as needed

# Add a country column to each DataFrame
benin_df['Country'] = 'Benin'
sierraleone_df['Country'] = 'Sierra Leone'
togo_df['Country'] = 'Togo'

# Combine the datasets
combined_df = pd.concat([benin_df, sierraleone_df, togo_df], ignore_index=True)

# Step 1: Metric Comparison - Boxplots
plt.figure(figsize=(18, 6))

# GHI Boxplot
plt.subplot(1, 3, 1)
sns.boxplot(x='Country', y='GHI', data=combined_df)
plt.title('GHI by Country')

# DNI Boxplot
plt.subplot(1, 3, 2)
sns.boxplot(x='Country', y='DNI', data=combined_df)
plt.title('DNI by Country')

# DHI Boxplot
plt.subplot(1, 3, 3)
sns.boxplot(x='Country', y='DHI', data=combined_df)
plt.title('DHI by Country')

plt.tight_layout()
plt.show()

# Step 2: Summary Table
summary_table = combined_df.groupby('Country')[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std']).reset_index()
print("\nSummary Table:")
print(summary_table)

# Step 3: Statistical Testing (One-way ANOVA)
f_stat, p_value = f_oneway(benin_df['GHI'], sierraleone_df['GHI'], togo_df['GHI'])
print(f'\nOne-way ANOVA results: F-statistic = {f_stat}, p-value = {p_value}')

# (Bonus) Visual Summary - Bar chart ranking countries by average GHI
avg_ghi = combined_df.groupby('Country')['GHI'].mean().sort_values()
avg_ghi.plot(kind='bar', color='skyblue')
plt.title('Average GHI by Country')
plt.ylabel('Average GHI')
plt.xticks(rotation=45)
plt.show()