In [38]:
import sqlite3
import pandas as pd

# Connect to the database
database_path = '../data/database.sqlite' 
connection = sqlite3.connect(database_path)
cursor = connection.cursor()

In [39]:
# Query to select the first few rows (e.g., first 5 rows)
query = "SELECT * FROM G7_GDPs"

# Load the results into a DataFrame
df = pd.read_sql_query(query, connection)
df

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
0,Canada,CAN,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,4.109345e+10
1,China,CHN,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,5.918412e+10
2,France,FRA,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,6.265147e+10
3,India,IND,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,3.767927e+10
4,Italy,ITA,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,4.038529e+10
...,...,...,...,...,...,...
965,India,IND,GDP per capita (current US$),NY.GDP.PCAP.CD,2014,1.581511e+03
966,Italy,ITA,GDP per capita (current US$),NY.GDP.PCAP.CD,2014,3.490850e+04
967,Japan,JPN,GDP per capita (current US$),NY.GDP.PCAP.CD,2014,3.619442e+04
968,United Kingdom,GBR,GDP per capita (current US$),NY.GDP.PCAP.CD,2014,4.633198e+04


In [40]:
# Get the first few lines (e.g., the first 5 rows)
first_few_lines = df.head()

# Print the first few lines in CSV format without saving to a file
print(first_few_lines.to_csv(index=False))

CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
Canada,CAN,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,41093453544.9096
China,CHN,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,59184116488.9977
France,FRA,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,62651474946.6007
India,IND,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,37679274491.2745
Italy,ITA,GDP at market prices (current US$),NY.GDP.MKTP.CD,1960,40385288344.1911



In [41]:
df = df.pivot_table(index=['CountryName', 'CountryCode', 'Year'], 
                            columns='IndicatorName', 
                            values='Value').reset_index()
df

IndicatorName,CountryName,CountryCode,Year,GDP at market prices (current US$),GDP per capita (current US$)
0,Canada,CAN,1960,4.109345e+10,2294.568814
1,Canada,CAN,1961,4.076797e+10,2231.293824
2,Canada,CAN,1962,4.197885e+10,2255.230044
3,Canada,CAN,1963,4.465717e+10,2354.839122
4,Canada,CAN,1964,4.888294e+10,2529.518179
...,...,...,...,...,...
480,United States,USA,2010,1.496437e+13,48374.056457
481,United States,USA,2011,1.551793e+13,49781.357490
482,United States,USA,2012,1.616316e+13,51456.658728
483,United States,USA,2013,1.676805e+13,52980.043626


In [42]:
# Rename columns to the desired structure
df.columns.name = None
df.rename(columns={
    'CountryName': 'country_name',
    'CountryCode': 'country_code',
    'Year': 'year',
    'GDP at market prices (current US$)': 'gdp_total_usd',
    'GDP per capita (current US$)': 'gdp_per_capita_usd'
}, inplace=True)

In [43]:
# Get unique values from the "country_code" column after renaming
unique_country_codes = df["country_code"].unique()

# Print the unique values
print(unique_country_codes)

['CAN' 'CHN' 'FRA' 'DEU' 'IND' 'ITA' 'JPN' 'GBR' 'USA']


In [44]:
# Create a mapping dictionary for the ISO codes
iso_mapping = {
    'CAN': 'CA',  # Canada
    'CHN': 'CN',  # China
    'FRA': 'FR',  # France
    'DEU': 'DE',  # Germany
    'IND': 'IN',  # India
    'ITA': 'IT',  # Italy
    'JPN': 'JP',  # Japan
    'GBR': 'GB',  # United Kingdom
    'USA': 'US'   # United States
}

# Add a new column 'ISO_codes' by mapping the 'country_code' to the dictionary
df['ISO_codes'] = df['country_code'].map(iso_mapping)
df

Unnamed: 0,country_name,country_code,year,gdp_total_usd,gdp_per_capita_usd,ISO_codes
0,Canada,CAN,1960,4.109345e+10,2294.568814,CA
1,Canada,CAN,1961,4.076797e+10,2231.293824,CA
2,Canada,CAN,1962,4.197885e+10,2255.230044,CA
3,Canada,CAN,1963,4.465717e+10,2354.839122,CA
4,Canada,CAN,1964,4.888294e+10,2529.518179,CA
...,...,...,...,...,...,...
480,United States,USA,2010,1.496437e+13,48374.056457,US
481,United States,USA,2011,1.551793e+13,49781.357490,US
482,United States,USA,2012,1.616316e+13,51456.658728,US
483,United States,USA,2013,1.676805e+13,52980.043626,US


In [45]:
# Close the cursor and connection
cursor.close()
connection.close()