# 01 - Download and Clean
This notebook downloads and cleans the data. 


Technology Adoption  →  Innovation  →  Economic Performance


In [7]:
import wbdata
import pandas as pd
import datetime as dt

# --- Select countries (ISO3 codes for emerging economies) ---
countries = ["BRA", "CHN", "IND", "TUR", "IDN", "MEX", "ZAF", "PHL", "THA", "VNM", "EGY"]

# --- Select indicators from the World Bank ---
indicators = {
    "IT.NET.USER.ZS": "internet_users",            # Internet users (% of population)
    "GB.XPD.RSDV.GD.ZS": "rnd_expenditure",        # R&D expenditure (% of GDP)
    "TX.VAL.TECH.MF.ZS": "hightech_exports",       # High-tech exports (% of manufactured exports)
    "NY.GDP.MKTP.KD.ZG": "gdp_growth"              # GDP growth (annual %)
}

# --- Define date range ---
start = dt.datetime(2010, 1, 1)
end = dt.datetime(2023, 12, 31)

# --- Fetch data from World Bank API ---
df = wbdata.get_dataframe(indicators, country=countries, date=(start, end))

# --- Clean up for easier reading ---
df = df.reset_index().rename(columns={"country": "Country", "date": "Year"})

# Convert 'Year' to int if possible
df["Year"] = pd.to_datetime(df["Year"], errors='coerce').dt.year

# Sort for readability
df = df.sort_values(["Country", "Year"]).reset_index(drop=True)

print(df.head(50))




             Country  Year  internet_users  rnd_expenditure  hightech_exports  \
0             Brazil  2010         40.6500          1.15992         12.590147   
1             Brazil  2011         45.6900          1.13966         11.140908   
2             Brazil  2012         48.5600          1.12684         11.887221   
3             Brazil  2013         51.0400          1.19567         11.969862   
4             Brazil  2014         54.5510          1.26971         12.371169   
5             Brazil  2015         58.3280          1.37093         14.485900   
6             Brazil  2016         60.8725          1.28637         16.000164   
7             Brazil  2017         67.4713          1.11750         14.311889   
8             Brazil  2018         70.4343          1.16769         14.744242   
9             Brazil  2019         73.9124          1.21096         14.066805   
10            Brazil  2020         81.3427          1.14526         11.350094   
11            Brazil  2021  