In [None]:
# Step 0: Imports and Setup

import sys
import os
sys.path.append(os.path.abspath(".."))

import pandas as pd
from src.data_utils import load_data, clean_columns, export_cleaned, summarize_missing
from src.eda_utils import *

# Load Sierra Leone data
df = load_data("../data/benin-malanville.csv")
df.head()


In [None]:
# Step A: Summary Stats and Missing Values
df.describe()
summarize_missing(df)


In [None]:
# Step B: Outlier Detection & Imputation
columns_to_check = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
detect_outliers(df, columns_to_check)

df_cleaned = clean_columns(df, columns_to_check)


In [None]:
# Step C: Export Cleaned Data
export_cleaned(df_cleaned, "../data/sierraleone_clean.csv")


In [None]:
# Step D: Time Series Analysis
plot_time_series(df_cleaned, ["GHI", "DNI", "DHI", "Tamb"])


In [None]:
# Step E: Cleaning Impact
plot_cleaning_effect(df_cleaned)


In [None]:
# Step F: Correlation Heatmap
plot_correlation_heatmap(df_cleaned, ["GHI", "DNI", "DHI", "TModA", "TModB"])


In [None]:
# Step G: Scatter Plots
plot_scatter(df_cleaned, "WS", "GHI", "Wind Speed vs GHI")
plot_scatter(df_cleaned, "RH", "Tamb", "Relative Humidity vs Temperature")


In [None]:
# Step H: Wind Distribution & Histogram
plot_histogram(df_cleaned, "WD")
plot_histogram(df_cleaned, "GHI")


In [None]:
# Step I: Bubble Chart
plot_bubble_chart(df_cleaned, "GHI", "Tamb", "RH")
