In [1]:
# PHASE 1: SETUP & DATA LOADING
# ==========================================

# 1. Required Imports
import pandas as pd
import altair as alt
import numpy as np

# 2. Define File Path
# ------------------------------------------
# CURRENT SETUP (For working on your PC now):
# We use a "raw" string (r'...') to handle the backslashes in Windows paths correctly.
file_path = r'C:\Users\Cameron\Downloads\country_economics_data.csv'

# SUBMISSION SETUP (Important for Dec 4th):
# When you submit, move the csv to the same folder as this script and use this instead:
# file_path = 'country_economics_data.csv'

# 3. Load the Data
# ------------------------------------------
try:
    df = pd.read_csv(file_path)
    print("✅ Data loaded successfully!")
    print(f"Shape: {df.shape[0]} rows, {df.shape[1]} columns")

except FileNotFoundError:
    print(f"❌ Error: The file was not found at {file_path}")
    print("Please check the path or move the CSV to the script folder.")


✅ Data loaded successfully!
Shape: 173 rows, 21 columns

--- First 5 Rows ---
          Name  ID Abbreviation        Currency       Capital   Languages  \
0  Afghanistan   4           AF  Afghan afghani         Kabul        Dari   
1      Albania   8           AL    Albanian lek        Tirana    Albanian   
2      Algeria  12           DZ  Algerian dinar       Algiers      Arabic   
3       Angola  24           AO  Angolan kwanza        Luanda  Portuguese   
4    Argentina  32           AR  Argentine peso  Buenos Aires      Guaran   

   Latitude  Longitude       Area  Population  ...         Subregion  \
0      33.0       65.0   652230.0       35.70  ...     Southern Asia   
1      41.0       20.0    28748.0        2.36  ...  Southeast Europe   
2      28.0        3.0  2381741.0       46.81  ...   Northern Africa   
3     -12.5       18.5  1246700.0       35.12  ...     Middle Africa   
4     -34.0      -64.0  2780400.0       47.07  ...     South America   

                          

In [5]:
# 4. Initial Inspection (Task 1.1 Requirement)
# ------------------------------------------
# Display the first 5 rows to verify data looks correct
print("\n--- First 5 Rows ---")
display(df.head(10))
#display(df.head()) if 'display' in locals() else print(df.head())

# Display column names to help select your 7 attributes
print("\n--- Column Names ---")
print(list(df.columns))

# Check for missing values (NaNs) immediately
print("\n--- Missing Values Check ---")
print(df.isnull().sum()[df.isnull().sum() > 0])


--- First 5 Rows ---


Unnamed: 0,Name,ID,Abbreviation,Currency,Capital,Languages,Latitude,Longitude,Area,Population,...,Subregion,Borders,GDP,GDP Growth,Interest Rate,Inflation Rate,Jobless Rate,Gov. Budget,Debt/GDP,Current Account
0,Afghanistan,4,AF,Afghan afghani,Kabul,Dari,33.0,65.0,652230.0,35.7,...,Southern Asia,"['IRN', 'PAK', 'TKM', 'UZB', 'TJK', 'CHN']",17,,14.94,0.2,13.3,-4.8,8.3,-17.3
1,Albania,8,AL,Albanian lek,Tirana,Albanian,41.0,20.0,28748.0,2.36,...,Southeast Europe,"['MNE', 'GRC', 'MKD', 'UNK']",27,0.7,2.5,2.5,8.7,-0.7,54.7,-2.4
2,Algeria,12,DZ,Algerian dinar,Algiers,Arabic,28.0,3.0,2381741.0,46.81,...,Northern Africa,"['TUN', 'LBY', 'NER', 'ESH', 'MRT', 'MLI', 'MAR']",264,18.6,3.0,-0.22,11.43,-9.3,46.2,1.3
3,Angola,24,AO,Angolan kwanza,Luanda,Portuguese,-12.5,18.5,1246700.0,35.12,...,Middle Africa,"['COG', 'COD', 'ZMB', 'NAM']",80,2.3,19.5,19.48,29.4,-1.5,58.2,5.4
4,Argentina,32,AR,Argentine peso,Buenos Aires,Guaran,-34.0,-64.0,2780400.0,47.07,...,South America,"['BOL', 'BRA', 'CHL', 'PRY', 'URY']",633,0.8,29.0,36.6,7.9,-4.37,83.2,0.6
5,Armenia,51,AM,Armenian dram,Yerevan,Armenian,40.0,45.0,29743.0,2.99,...,Western Asia,"['AZE', 'GEO', 'IRN', 'TUR']",26,,6.75,3.4,13.9,-3.15,50.3,2.8
6,Australia,36,AU,Australian dollar,Canberra,English,-27.0,133.0,7692024.0,27.4,...,Australia and New Zealand,,1752,0.2,3.6,2.1,4.2,0.6,43.8,-2.1
7,Austria,40,AT,Euro,Vienna,German,47.333333,13.333333,83871.0,9.2,...,Central Europe,"['CZE', 'DEU', 'HUN', 'ITA', 'LIE', 'SVK', 'SV...",522,0.1,2.15,3.6,6.7,-4.7,81.8,2.4
8,Azerbaijan,31,AZ,Azerbaijani manat,Baku,Azerbaijani,40.5,47.5,86600.0,10.18,...,Western Asia,"['ARM', 'GEO', 'IRN', 'RUS', 'TUR']",74,,7.0,5.0,5.3,-0.4,20.9,6.3
9,Bahamas,44,BS,Bahamian dollar,Nassau,English,25.0343,-77.3963,13943.0,0.41,...,Caribbean,,16,,4.0,-0.4,9.4,-1.3,81.5,-7.9



--- Column Names ---
['Name', 'ID', 'Abbreviation', 'Currency', 'Capital', 'Languages', 'Latitude', 'Longitude', 'Area', 'Population', 'Region', 'Subregion', 'Borders', 'GDP', 'GDP Growth', 'Interest Rate', 'Inflation Rate', 'Jobless Rate', 'Gov. Budget', 'Debt/GDP', 'Current Account']

--- Missing Values Check ---
Abbreviation        1
Capital             1
Borders            24
GDP Growth         72
Interest Rate      11
Inflation Rate      1
Jobless Rate        2
Gov. Budget         7
Debt/GDP            8
Current Account     6
dtype: int64
