In [2]:
import pandas as pd

# Define file paths for the datasets
file_paths = {
    'benin-malanville': 'benin-malanville_data.csv',
    'sierraleone-bumbuna': 'sierraleone-bumbuna_data.csv',
    'togo-dapaong_qc': 'togo-dapaong_qc_data.csv'
}

class DataCleaner:
    def __init__(self, file_paths):
        self.dataframes = {country: pd.read_csv(path) for country, path in file_paths.items()}

    def clean_data(self):
        """
        Performs final data cleaning based on insights from previous analyses.
        """
        for country, df in self.dataframes.items():
            print(f"Cleaning data for {country}:")
            
            # Drop rows with missing critical values in GHI, DNI, DHI
            df.dropna(subset=['GHI', 'DNI', 'DHI'], inplace=True)
            
            # Handle incorrect entries by replacing negative values with NaN
            df.loc[df['GHI'] < 0, 'GHI'] = pd.NA
            df.loc[df['DNI'] < 0, 'DNI'] = pd.NA
            df.loc[df['DHI'] < 0, 'DHI'] = pd.NA
            
            # Fill missing values with the median of the respective numeric columns
            numeric_cols = df.select_dtypes(include='number')
            df.fillna(numeric_cols.median(), inplace=True)
            
            # Remove rows with any remaining NaN values
            df.dropna(inplace=True)
            
            # Check and remove duplicate rows
            df.drop_duplicates(inplace=True)
            
            # Save cleaned dataset
            cleaned_file_path = f'cleaned_{country}_data.csv'
            df.to_csv(cleaned_file_path, index=False)
            print(f"Cleaned data saved to {cleaned_file_path}")
            print("\n")

# Create an instance of DataCleaner
cleaner = DataCleaner(file_paths)

# Perform data cleaning
cleaner.clean_data()


Cleaning data for benin-malanville:
Cleaned data saved to cleaned_benin-malanville_data.csv


Cleaning data for sierraleone-bumbuna:
Cleaned data saved to cleaned_sierraleone-bumbuna_data.csv


Cleaning data for togo-dapaong_qc:
Cleaned data saved to cleaned_togo-dapaong_qc_data.csv


