Imports and Data Loading

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import numpy as np

# Load cleaned data
benin = pd.read_csv('../data/benin_clean.csv', parse_dates=['Timestamp'])
sierra = pd.read_csv('../data/sierraleone_clean.csv', parse_dates=['Timestamp'])
togo = pd.read_csv('../data/togo_clean.csv', parse_dates=['Timestamp'])

# Add country column
benin['Country'] = 'Benin'
sierra['Country'] = 'Sierra Leone'
togo['Country'] = 'Togo'

# Concatenate for comparison
df_all = pd.concat([benin, sierra, togo], ignore_index=True)

FileNotFoundError: [Errno 2] No such file or directory: '../data/sierra_leone_clean.csv'

Metric Comparison: Boxplots
Boxplots for GHI, DNI, DHI per country:

In [None]:
metrics = ['GHI', 'DNI', 'DHI']
for metric in metrics:
    plt.figure(figsize=(8, 5))
    sns.boxplot(data=df_all, x='Country', y=metric, palette='Set2')
    plt.title(f'{metric} Distribution by Country')
    plt.ylabel(f'{metric} (W/m²)')
    plt.xlabel('Country')
    plt.show()

Summary Table

In [None]:
summary = df_all.groupby('Country')[metrics].agg(['mean', 'median', 'std']).round(2)
display(summary)

Statistical Testing
One-way ANOVA on GHI:

In [None]:
fval, pval = stats.f_oneway(
    benin['GHI'].dropna(), 
    sierra['GHI'].dropna(), 
    togo['GHI'].dropna()
)
print(f"ANOVA F-statistic: {fval:.2f}, p-value: {pval:.4f}")

# If p < 0.05, difference is statistically significant

Key Observations (Markdown Cell)

### Key Insights

- Benin shows the highest median GHI, but also the widest range.
- Togo’s DNI is consistently higher than Sierra Leone, suggesting stronger direct sunlight.
- Sierra Leone demonstrates the lowest average DHI, possibly indicating less cloud cover during peak sunlight hours.

(Replace with your actual findings!)

Visual Summary: Bar Chart
Ranking by average GHI:

In [None]:
mean_ghi = df_all.groupby('Country')['GHI'].mean().sort_values(ascending=False)
mean_ghi.plot(kind='bar', color=['#F4B400', '#0F9D58', '#4285F4'])
plt.title('Average GHI by Country')
plt.ylabel('Average GHI (W/m²)')
plt.show()