In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Load cleaned datasets
benin = pd.read_csv("../data/benin_clean.csv")
togo = pd.read_csv("../data/togo_clean.csv")
sierra = pd.read_csv("../data/sierra_leone_clean.csv")

# Add a 'Country' column
benin["Country"] = "Benin"
togo["Country"] = "Togo"
sierra["Country"] = "Sierra Leone"

# Combine into one DataFrame
df = pd.concat([benin, togo, sierra], ignore_index=True)
df.head()


In [None]:
avg_ghi = df.groupby("Country")["GHI"].mean().sort_values(ascending=False)
avg_ghi.plot(kind="bar", color="skyblue", figsize=(6, 4), title="Average GHI by Country")
plt.ylabel("Average GHI")
plt.show()


In [None]:
metrics = ["GHI", "DNI", "DHI"]
for metric in metrics:
    plt.figure(figsize=(8, 5))
    sns.boxplot(x="Country", y=metric, data=df)
    plt.title(f"{metric} Comparison Across Countries")
    plt.show()


In [None]:
summary = df.groupby("Country")[["GHI", "DNI", "DHI"]].agg(["mean", "median", "std"]).round(2)
summary


In [None]:
anova = stats.f_oneway(
    df[df["Country"]=="Benin"]["GHI"].dropna(),
    df[df["Country"]=="Togo"]["GHI"].dropna(),
    df[df["Country"]=="Sierra Leone"]["GHI"].dropna()
)

print(f"ANOVA p-value for GHI differences: {anova.pvalue:.4f}")
