In [11]:
import pandas as pd

# --- Step 1: Preprocessing ONLY the Global Findex Data ---

# Load the raw Findex CSV file
try:
    findex_df_raw = pd.read_csv('data/GlobalFindexDatabase2025.csv')
    print("✅ GlobalFindexDatabase2025.csv loaded successfully.")
except FileNotFoundError as e:
    print(f"❌ File not found. Please check the filename in the 'data' folder. Error: {e}")

# Define the actual column names we need to extract and their new, clean names
findex_cols_to_use = {
    'countrynewwb': 'Country Name',
    'codewb': 'Country Code',
    'year': 'Year',
    'fin27a': 'DigitalPaymentUsage' # This is the actual column for 'Made or received digital payments'
}

# Create the new dataframe by selecting and renaming only the columns we need
try:
    digital_payment_df = findex_df_raw[list(findex_cols_to_use.keys())].copy()
    digital_payment_df = digital_payment_df.rename(columns=findex_cols_to_use)

    # Convert the value from a ratio (e.g., 0.33) to a percentage (e.g., 33.0)
    digital_payment_df['DigitalPaymentUsage'] = digital_payment_df['DigitalPaymentUsage'] * 100

    # Drop any rows that have missing values in our selected columns
    digital_payment_df = digital_payment_df.dropna()

    print("\n✅ Digital Payment Usage data preprocessed successfully.")
    
    # --- Verification Step ---
    print("\n--- Verifying the cleaned data ---")
    print("First 5 rows of the cleaned data:")
    display(digital_payment_df.head())
    
    print("\nData types and non-null counts:")
    digital_payment_df.info()

except KeyError as e:
    print(f"❌ A required column was not found in the file: {e}")



✅ GlobalFindexDatabase2025.csv loaded successfully.

✅ Digital Payment Usage data preprocessed successfully.

--- Verifying the cleaned data ---
First 5 rows of the cleaned data:


  findex_df_raw = pd.read_csv('data/GlobalFindexDatabase2025.csv')


Unnamed: 0,Country Name,Country Code,Year,DigitalPaymentUsage
431,Afghanistan,AFG,2021,0.0
432,Albania,ALB,2021,4.258758
433,Algeria,DZA,2021,2.767699
434,Argentina,ARG,2021,25.090499
435,Armenia,ARM,2021,8.360455



Data types and non-null counts:
<class 'pandas.core.frame.DataFrame'>
Index: 1223 entries, 431 to 8565
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Country Name         1223 non-null   object 
 1   Country Code         1223 non-null   object 
 2   Year                 1223 non-null   int64  
 3   DigitalPaymentUsage  1223 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 47.8+ KB
