In [1]:
'''

@Author: Vighnesh Harish Bilgi
@Date: 2022-12-09
@Last Modified by: Vighnesh Harish Bilgi
@Last Modified time: 2022-12-09
@Title : Data Modelling - 2 : Wealth Accounts Datasets

'''

'\n\n@Author: Vighnesh Harish Bilgi\n@Date: 2022-12-09\n@Last Modified by: Vighnesh Harish Bilgi\n@Last Modified time: 2022-12-09\n@Title : Data Modelling - 2\n\n'

In [2]:
import mysql.connector
import pandas as pd
import os

In [3]:
sql_password = os.environ.get("sql_password")

### Custom functions to connect to the mysql server and iterate the cursor

In [4]:
def iterate_cursor(db_cursor):
    """
    Description:
        Take MySQLCursor argument from parameter "db_cursor" and iterate and print the selected records with its column name 
        (i.e. a SELECT or SHOW command must be called before iterate_cursor() is called).
    Parameter:
        MySQLCursor db_cursor
    Return:
        No values returned.
    """
    print(db_cursor.column_names)
    for db in db_cursor:
        print(db)

def connect_to_sql():
    """
    Description:
        To setup connection to MySQL Server, pass hostname , user and password as arguments to parameters "host","user" and "passwd" of connect() respectively.
        connect() is method from module "mysql.connector". Once the connection is set. Return it.
    Parameter:
        No parameters.
    Return:
        MYSQLConnection db_connection
    """

    db_connection = mysql.connector.connect(
    host= "localhost",
    user= "root",
    passwd= sql_password
    )

    return db_connection

### Custom functions to create database 'accounts'

In [5]:
def create_database(db_cursor):
    """
    Description:
        Take MySQLCursor argument from parameter "db_cursor" and create database
        Return updated cursor. 
    Parameter:
        MySQLCursor db_cursor
    Return:
        MySQLCursor db_cursor
    """
    print("Creating DATABASE - accounts\n")
    db_cursor.execute("DROP DATABASE IF EXISTS accounts")
    db_cursor.execute("CREATE DATABASE accounts")

    print("Showing all databases...")
    db_cursor.execute("SHOW DATABASES")
    iterate_cursor(db_cursor)

    print("\nUsing accounts")
    db_cursor.execute("USE accounts")

    return db_cursor

### Custom function to create tables

In [6]:
def create_table(db_cursor,table,schema):
    """
    Description:
        Take MySQLCursor argument from parameter "db_cursor" and create table.
        Return updated cursor. 
    Parameter:
        MySQLCursor db_cursor,
        string table,
        string schema
    Return:
        MySQLCursor db_cursor
    """

    print(f"Creating TABLE - {table}")
    db_cursor.execute(f"""CREATE TABLE IF NOT EXISTS {table} {schema}""")

    print("Showing all tables under accounts...")
    db_cursor.execute("SHOW TABLES")
    iterate_cursor(db_cursor)

    print(f"Showing '{table}' Table schema  ...")
    db_cursor.execute(f"DESCRIBE {table}")
    iterate_cursor(db_cursor)
    return db_cursor

### Custom function to insert records into tables

In [7]:
def insert_table(db_cursor,table,cols,record):
    """
    Description:
        Take MySQLCursor argument from parameter "db_cursor" and perform Create Operation of CRUD by applying execute() on cursor.
        Return updated cursor. 
    Parameter:
        MySQLCursor db_cursor,
        string table,
        string cols,
        string record
    Return:
        MySQLCursor db_cursor
    """

    print(f"Inserting record {record} into table...\n")
    db_cursor.execute(f"""INSERT INTO {table} {cols}
                        VALUES {record}""")
    return db_cursor

### Custom function to show all records from table

In [8]:
def show_all_records(db_cursor,table):
    """
    Description:
        Take MySQLCursor argument from parameter "db_cursor" and show all records from table.
        Return updated cursor. 
    Parameter:
        MySQLCursor db_cursor
    Return:
        MySQLCursor db_cursor
    """

    print("Printing records from table...\n")
    db_cursor.execute(f"SELECT * FROM {table}")
    iterate_cursor(db_cursor)
    return db_cursor

### Read 'wealth-accountsCountry.csv' into pandas dataframe

In [9]:
accounts_country = pd.read_csv("data/wealth-accountsCountry.csv", encoding='cp1252')
accounts_country.head()

Unnamed: 0,Code,Long Name,Income Group,Region,Lending category,Other groups,Currency Unit,Latest population census,Latest household survey,Special Notes,...,Source of most recent Income and expenditure data,Vital registration complete,Latest agricultural census,Latest industrial data,Latest trade data,Latest water withdrawal data,2-alpha code,WB-2 code,Table Name,Short Name
0,ALB,Republic of Albania,Upper middle income,Europe & Central Asia,IBRD,,Albanian lek,2020 (expected),"Demographic and Health Survey, 2017/18",,...,Living Standards Measurement Study Survey (LSM...,Yes,2012,2013.0,2018.0,2006.0,AL,AL,Albania,Albania
1,ARG,Argentine Republic,Upper middle income,Latin America & Caribbean,IBRD,,Argentine peso,2020 (expected),"Multiple Indicator Cluster Survey, 2019/20",,...,"Integrated household survey (IHS), 2016",Yes,2008,2002.0,2018.0,2011.0,AR,AR,Argentina,Argentina
2,ARM,Republic of Armenia,Upper middle income,Europe & Central Asia,IBRD,,Armenian dram,2020 (expected),"Demographic and Health Survey, 2015/16",,...,"Integrated household survey (IHS), 2016",Yes,2014,,2018.0,2012.0,AM,AM,Armenia,Armenia
3,AUS,Commonwealth of Australia,High income,East Asia & Pacific,,,Australian dollar,2016,,Fiscal year end: June 30; reporting period for...,...,"Expenditure survey/budget survey (ES/BS), 2010",Yes,2015-2016,2013.0,2018.0,2013.0,AU,AU,Australia,Australia
4,AUT,Republic of Austria,High income,Europe & Central Asia,,Euro area,Euro,2011. Population figures compiled from adminis...,,A simple multiplier is used to convert the nat...,...,"Income survey (IS), 2015",Yes,2010,2014.0,2018.0,2010.0,AT,AT,Austria,Austria


#### summary of the dataframe

In [10]:
accounts_country.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146 entries, 0 to 145
Data columns (total 31 columns):
 #   Column                                             Non-Null Count  Dtype  
---  ------                                             --------------  -----  
 0   Code                                               146 non-null    object 
 1   Long Name                                          146 non-null    object 
 2   Income Group                                       145 non-null    object 
 3   Region                                             146 non-null    object 
 4   Lending category                                   107 non-null    object 
 5   Other groups                                       52 non-null     object 
 6   Currency Unit                                      146 non-null    object 
 7   Latest population census                           145 non-null    object 
 8   Latest household survey                            119 non-null    object 
 9   Special No

#### viewing columns of the dataframe

In [11]:
accounts_country.columns

Index(['Code', 'Long Name', 'Income Group', 'Region', 'Lending category',
       'Other groups', 'Currency Unit', 'Latest population census',
       'Latest household survey', 'Special Notes',
       'National accounts base year', 'National accounts reference year',
       'System of National Accounts', 'SNA price valuation',
       'Alternative conversion factor', 'PPP survey years',
       'Balance of Payments Manual in use', 'External debt Reporting status',
       'System of trade', 'Government Accounting concept',
       'IMF data dissemination standard',
       'Source of most recent Income and expenditure data',
       'Vital registration complete', 'Latest agricultural census',
       'Latest industrial data', 'Latest trade data',
       'Latest water withdrawal data', '2-alpha code', 'WB-2 code',
       'Table Name', 'Short Name'],
      dtype='object')

#### renaming some columns of the dataframe

In [12]:
accounts_country.rename(columns = {'Code':'country_code', 'Short Name':'short_name','Table Name':'table_name','Long Name':'long_name','Currency Unit':'currency_unit'}, inplace = True)

In [13]:
accounts_country.columns

Index(['country_code', 'long_name', 'Income Group', 'Region',
       'Lending category', 'Other groups', 'currency_unit',
       'Latest population census', 'Latest household survey', 'Special Notes',
       'National accounts base year', 'National accounts reference year',
       'System of National Accounts', 'SNA price valuation',
       'Alternative conversion factor', 'PPP survey years',
       'Balance of Payments Manual in use', 'External debt Reporting status',
       'System of trade', 'Government Accounting concept',
       'IMF data dissemination standard',
       'Source of most recent Income and expenditure data',
       'Vital registration complete', 'Latest agricultural census',
       'Latest industrial data', 'Latest trade data',
       'Latest water withdrawal data', '2-alpha code', 'WB-2 code',
       'table_name', 'short_name'],
      dtype='object')

#### selecting certain columns of the dataframe for the table 'accounts_country'

In [14]:
accounts_country_clean = accounts_country[['country_code','short_name','table_name','long_name','currency_unit']]
accounts_country_clean.head()

Unnamed: 0,country_code,short_name,table_name,long_name,currency_unit
0,ALB,Albania,Albania,Republic of Albania,Albanian lek
1,ARG,Argentina,Argentina,Argentine Republic,Argentine peso
2,ARM,Armenia,Armenia,Republic of Armenia,Armenian dram
3,AUS,Australia,Australia,Commonwealth of Australia,Australian dollar
4,AUT,Austria,Austria,Republic of Austria,Euro


### Read 'wealth-accountsData.csv' into pandas dataframe

In [15]:
accounts_data = pd.read_csv("data/wealth-accountsData.csv", encoding='cp1252')
accounts_data.head()

Unnamed: 0,ï»¿Series Name,Series Code,Country Name,Country Code,1995 [YR1995],1996 [YR1996],1997 [YR1997],1998 [YR1998],1999 [YR1999],2000 [YR2000],...,2009 [YR2009],2010 [YR2010],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018]
0,Human capital (constant 2018 US$),NW.HCA.TO,Albania,ALB,44861001530.0,43430547992.0,37131131819.0,38792147742.0,42196257452.0,43611277219.0,...,66109240000.0,68056510000.0,68535250000.0,70752290000.0,71600310000.0,72487670000.0,73727870000.0,75716975793.0,78045555079.0,81214984640.0
1,Human capital (constant 2018 US$),NW.HCA.TO,Argentina,ARG,1378550000000.0,1404710000000.0,1551170000000.0,1741400000000.0,1790040000000.0,1841360000000.0,...,2045920000000.0,2419210000000.0,2953450000000.0,2968890000000.0,3180360000000.0,3128110000000.0,3215850000000.0,3153010000000.0,3226630000000.0,3159940000000.0
2,Human capital (constant 2018 US$),NW.HCA.TO,Armenia,ARM,31162180567.0,32446809421.0,32562110893.0,35586495233.0,35852007081.0,36048138201.0,...,57953800000.0,60834930000.0,67294780000.0,70261390000.0,72147700000.0,73201070000.0,80113390000.0,76522388252.0,81504751657.0,84936407008.0
3,Human capital (constant 2018 US$),NW.HCA.TO,Australia,AUS,5657110000000.0,5916110000000.0,6036460000000.0,6265210000000.0,6432260000000.0,6583070000000.0,...,8824470000000.0,9242020000000.0,9788920000000.0,10316400000000.0,10495500000000.0,10937100000000.0,11324800000000.0,11267700000000.0,11503000000000.0,11787900000000.0
4,Human capital (constant 2018 US$),NW.HCA.TO,Austria,AUT,2226850000000.0,2205980000000.0,2185310000000.0,2231640000000.0,2279840000000.0,2301080000000.0,...,2629330000000.0,2603340000000.0,2686480000000.0,2847440000000.0,2835930000000.0,2774530000000.0,2899920000000.0,3006280000000.0,3046630000000.0,3105690000000.0


#### summary of the dataframe

In [16]:
accounts_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7592 entries, 0 to 7591
Data columns (total 28 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ï»¿Series Name  7592 non-null   object 
 1   Series Code     7592 non-null   object 
 2   Country Name    7592 non-null   object 
 3   Country Code    7592 non-null   object 
 4   1995 [YR1995]   7592 non-null   object 
 5   1996 [YR1996]   7592 non-null   object 
 6   1997 [YR1997]   7592 non-null   object 
 7   1998 [YR1998]   7592 non-null   object 
 8   1999 [YR1999]   7592 non-null   object 
 9   2000 [YR2000]   7592 non-null   object 
 10  2001 [YR2001]   7592 non-null   object 
 11  2002 [YR2002]   7592 non-null   object 
 12  2003 [YR2003]   7592 non-null   object 
 13  2004 [YR2004]   7592 non-null   object 
 14  2005 [YR2005]   7592 non-null   float64
 15  2006 [YR2006]   7592 non-null   float64
 16  2007 [YR2007]   7592 non-null   float64
 17  2008 [YR2008]   7592 non-null   f

#### viewing columns of the dataframe

In [17]:
accounts_data.columns

Index(['ï»¿Series Name', 'Series Code', 'Country Name', 'Country Code',
       '1995 [YR1995]', '1996 [YR1996]', '1997 [YR1997]', '1998 [YR1998]',
       '1999 [YR1999]', '2000 [YR2000]', '2001 [YR2001]', '2002 [YR2002]',
       '2003 [YR2003]', '2004 [YR2004]', '2005 [YR2005]', '2006 [YR2006]',
       '2007 [YR2007]', '2008 [YR2008]', '2009 [YR2009]', '2010 [YR2010]',
       '2011 [YR2011]', '2012 [YR2012]', '2013 [YR2013]', '2014 [YR2014]',
       '2015 [YR2015]', '2016 [YR2016]', '2017 [YR2017]', '2018 [YR2018]'],
      dtype='object')

#### reanming columns of the dataframe

In [18]:
accounts_data.rename(columns = {'ï»¿Series Name':'indicator_name', 'Series Code':'indicator_code','Country Name':'country_name','Country Code':'country_code',
'1995 [YR1995]':'1995','1996 [YR1996]':'1996','1997 [YR1997]':'1997','1998 [YR1998]':'1998','1999 [YR1999]':'1999',
'2000 [YR2000]':'2000','2001 [YR2001]':'2001','2002 [YR2002]':'2002','2003 [YR2003]':'2003','2004 [YR2004]':'2004',
'2005 [YR2005]':'2005','2006 [YR2006]':'2006','2007 [YR2007]':'2007','2008 [YR2008]':'2008','2009 [YR2009]':'2009',
'2010 [YR2010]':'2010','2011 [YR2011]':'2011','2012 [YR2012]':'2012','2013 [YR2013]':'2013','2014 [YR2014]':'2014',
'2015 [YR2015]':'2015','2016 [YR2016]':'2016','2017 [YR2017]':'2017','2018 [YR2018]':'2018' }, inplace = True)
accounts_data.columns

Index(['indicator_name', 'indicator_code', 'country_name', 'country_code',
       '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003',
       '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
       '2013', '2014', '2015', '2016', '2017', '2018'],
      dtype='object')

#### selecting certaing columns of the dataframe for the table 'accounts_data'

In [19]:
accounts_data_clean = accounts_data[['country_name','country_code','indicator_name','indicator_code','1995','2000','2010','2014']]
accounts_data_clean['1995'] = accounts_data_clean['1995'].replace(['..'], 0)
accounts_data_clean['2000'] = accounts_data_clean['2000'].replace(['..'], 0)
accounts_data_clean['2010'] = accounts_data_clean['2010'].replace(['..'], 0)
accounts_data_clean['2014'] = accounts_data_clean['2014'].replace(['..'], 0)
accounts_data_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accounts_data_clean['1995'] = accounts_data_clean['1995'].replace(['..'], 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accounts_data_clean['2000'] = accounts_data_clean['2000'].replace(['..'], 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accounts_data_clean['2010'] = accounts_data_clean['

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,1995,2000,2010,2014
0,Albania,ALB,Human capital (constant 2018 US$),NW.HCA.TO,44861001530.0,43611277219.0,68056510000.0,72487670000.0
1,Argentina,ARG,Human capital (constant 2018 US$),NW.HCA.TO,1378550000000.0,1841360000000.0,2419210000000.0,3128110000000.0
2,Armenia,ARM,Human capital (constant 2018 US$),NW.HCA.TO,31162180567.0,36048138201.0,60834930000.0,73201070000.0
3,Australia,AUS,Human capital (constant 2018 US$),NW.HCA.TO,5657110000000.0,6583070000000.0,9242020000000.0,10937100000000.0
4,Austria,AUT,Human capital (constant 2018 US$),NW.HCA.TO,2226850000000.0,2301080000000.0,2603340000000.0,2774530000000.0


### Read 'wealth-accountsSeries.csv' into pandas dataframe

In [20]:
accounts_series = pd.read_csv("data/wealth-accountsSeries.csv", encoding='cp1252')
accounts_series.head()

Unnamed: 0,ï»¿Code,Indicator Name,Long definition,Source,Topic,Unit of measure,Periodicity,Reference period,Statistical concept and methodology,Previous Indicator Code,Previous Indicator Name
0,NW.HCA.TO,Human capital (constant 2018 US$),Human capital is computed as the present value...,World Bank. 2021. The Changing Wealth of Natio...,Human capital,Constant 2018 US$,Annual,1995-2018,Total wealth is calculated by summing up estim...,,
1,NW.HCA.PC,Human capital per capita (constant 2018 US$),Human capital is computed as the present value...,World Bank. 2021. The Changing Wealth of Natio...,Human capital,Constant 2018 US$,Annual,1995-2018,Total wealth is calculated by summing up estim...,,
2,NW.HCA.FEMP.PC,"Human capital per capita, employed female (con...",Human capital is computed as the present value...,World Bank. 2021. The Changing Wealth of Natio...,Human capital,Constant 2018 US$,Annual,1995-2018,Total wealth is calculated by summing up estim...,,
3,NW.HCA.MEMP.PC,"Human capital per capita, employed male (const...",Human capital is computed as the present value...,World Bank. 2021. The Changing Wealth of Natio...,Human capital,Constant 2018 US$,Annual,1995-2018,Total wealth is calculated by summing up estim...,,
4,NW.HCA.FEMA.PC,"Human capital per capita, female (constant 201...",Human capital is computed as the present value...,World Bank. 2021. The Changing Wealth of Natio...,Human capital,Constant 2018 US$,Annual,1995-2018,Total wealth is calculated by summing up estim...,,


#### summary of the dataframe

In [21]:
accounts_series.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 11 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   ï»¿Code                              52 non-null     object
 1   Indicator Name                       52 non-null     object
 2   Long definition                      52 non-null     object
 3   Source                               52 non-null     object
 4   Topic                                52 non-null     object
 5   Unit of measure                      52 non-null     object
 6   Periodicity                          52 non-null     object
 7   Reference period                     52 non-null     object
 8   Statistical concept and methodology  52 non-null     object
 9   Previous Indicator Code              2 non-null      object
 10  Previous Indicator Name              2 non-null      object
dtypes: object(11)
memory usage: 4.6+ KB


#### renaming some columns the dataframe

In [22]:
accounts_series.rename(columns = {'ï»¿Code':'series_code', 'Indicator Name':'indicator_name', 'Topic':'topic'}, inplace = True)
accounts_series.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 11 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   series_code                          52 non-null     object
 1   indicator_name                       52 non-null     object
 2   Long definition                      52 non-null     object
 3   Source                               52 non-null     object
 4   topic                                52 non-null     object
 5   Unit of measure                      52 non-null     object
 6   Periodicity                          52 non-null     object
 7   Reference period                     52 non-null     object
 8   Statistical concept and methodology  52 non-null     object
 9   Previous Indicator Code              2 non-null      object
 10  Previous Indicator Name              2 non-null      object
dtypes: object(11)
memory usage: 4.6+ KB


#### selecting some columns the dataframe

In [23]:
accounts_series_clean = accounts_series[['series_code','topic','indicator_name']]
accounts_series_clean.head()

Unnamed: 0,series_code,topic,indicator_name
0,NW.HCA.TO,Human capital,Human capital (constant 2018 US$)
1,NW.HCA.PC,Human capital,Human capital per capita (constant 2018 US$)
2,NW.HCA.FEMP.PC,Human capital,"Human capital per capita, employed female (con..."
3,NW.HCA.MEMP.PC,Human capital,"Human capital per capita, employed male (const..."
4,NW.HCA.FEMA.PC,Human capital,"Human capital per capita, female (constant 201..."


### Call fuction to connect to mysql cursor and get a cursor to execute queries

In [24]:
db_connection = connect_to_sql()
db_cursor = db_connection.cursor()

### Call function to create database 'accounts'

In [25]:
create_database(db_cursor)

Creating DATABASE - accounts

Showing all databases...
('Database',)
('accounts',)
('information_schema',)
('myfirstdb',)
('mysql',)
('performance_schema',)
('sakila',)
('sys',)

Using accounts


<mysql.connector.cursor.MySQLCursor at 0x17faffbc280>

### Call function to create table 'accounts_country'

In [26]:
table = "accounts_country"
schema = "(country_code VARCHAR(5) PRIMARY KEY, short_name VARCHAR(50), table_name VARCHAR(50),	long_name VARCHAR(200), currency_unit VARCHAR(50))"
create_table(db_cursor,table,schema)

Creating TABLE - accounts_country
Showing all tables under accounts...
('Tables_in_accounts',)
('accounts_country',)
Showing 'accounts_country' Table schema  ...
('Field', 'Type', 'Null', 'Key', 'Default', 'Extra')
('country_code', 'varchar(5)', 'NO', 'PRI', None, '')
('short_name', 'varchar(50)', 'YES', '', None, '')
('table_name', 'varchar(50)', 'YES', '', None, '')
('long_name', 'varchar(200)', 'YES', '', None, '')
('currency_unit', 'varchar(50)', 'YES', '', None, '')


<mysql.connector.cursor.MySQLCursor at 0x17faffbc280>

### Call function to create table 'accounts_data'

In [27]:
table = "accounts_data"
schema = "(country_name VARCHAR(50), country_code VARCHAR(5), indicator_name VARCHAR(200), indicator_code VARCHAR(50),	year_1995 BIGINT, year_2000 BIGINT,	year_2010 BIGINT, year_2014 BIGINT)"
create_table(db_cursor,table,schema)

Creating TABLE - accounts_data
Showing all tables under accounts...
('Tables_in_accounts',)
('accounts_country',)
('accounts_data',)
Showing 'accounts_data' Table schema  ...
('Field', 'Type', 'Null', 'Key', 'Default', 'Extra')
('country_name', 'varchar(50)', 'YES', '', None, '')
('country_code', 'varchar(5)', 'YES', '', None, '')
('indicator_name', 'varchar(200)', 'YES', '', None, '')
('indicator_code', 'varchar(50)', 'YES', '', None, '')
('year_1995', 'bigint', 'YES', '', None, '')
('year_2000', 'bigint', 'YES', '', None, '')
('year_2010', 'bigint', 'YES', '', None, '')
('year_2014', 'bigint', 'YES', '', None, '')


<mysql.connector.cursor.MySQLCursor at 0x17faffbc280>

### Call function to create table 'accounts_series'

In [28]:
table = "accounts_series"
schema = "(series_code VARCHAR(20), topic VARCHAR(20), indicator_name VARCHAR(200))"
db_cursor = create_table(db_cursor,table,schema)

Creating TABLE - accounts_series
Showing all tables under accounts...
('Tables_in_accounts',)
('accounts_country',)
('accounts_data',)
('accounts_series',)
Showing 'accounts_series' Table schema  ...
('Field', 'Type', 'Null', 'Key', 'Default', 'Extra')
('series_code', 'varchar(20)', 'YES', '', None, '')
('topic', 'varchar(20)', 'YES', '', None, '')
('indicator_name', 'varchar(200)', 'YES', '', None, '')


### Insert records from 'accounts_country_clean' dataframe to 'accounts_country' 

In [29]:
table = "accounts_country"
cols = "(country_code, short_name, table_name,	long_name, currency_unit)"
for index,row in accounts_country_clean.iterrows():
    record = (row[0],row[1],row[2],row[3],row[4])
    db_cursor = insert_table(db_cursor,table,cols,record)
db_cursor = show_all_records(db_cursor,table)

Inserting record ('ALB', 'Albania', 'Albania', 'Republic of Albania', 'Albanian lek') into table...

Inserting record ('ARG', 'Argentina', 'Argentina', 'Argentine Republic', 'Argentine peso') into table...

Inserting record ('ARM', 'Armenia', 'Armenia', 'Republic of Armenia', 'Armenian dram') into table...

Inserting record ('AUS', 'Australia', 'Australia', 'Commonwealth of Australia', 'Australian dollar') into table...

Inserting record ('AUT', 'Austria', 'Austria', 'Republic of Austria', 'Euro') into table...

Inserting record ('AZE', 'Azerbaijan', 'Azerbaijan', 'Republic of Azerbaijan', 'New Azeri manat') into table...

Inserting record ('BHR', 'Bahrain', 'Bahrain', 'Kingdom of Bahrain', 'Bahraini dinar') into table...

Inserting record ('BGD', 'Bangladesh', 'Bangladesh', "People's Republic of Bangladesh", 'Bangladeshi taka') into table...

Inserting record ('BLR', 'Belarus', 'Belarus', 'Republic of Belarus', 'Belarusian rubel') into table...

Inserting record ('BEL', 'Belgium', 'Be

### Insert records from 'accounts_data_clean' dataframe to 'accounts_data' 

In [30]:
table = "accounts_data"
cols = "(country_name, country_code, indicator_name, indicator_code, year_1995, year_2000,	year_2010, year_2014)"
for index,row in accounts_data_clean.iterrows():
    record = (row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7])
    db_cursor = insert_table(db_cursor,table,cols,record)
        
db_cursor = show_all_records(db_cursor,table)

Inserting record ('Albania', 'ALB', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '44861001530', '43611277219', 68056509547.0, 72487665594.0) into table...

Inserting record ('Argentina', 'ARG', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '1.37855E+12', '1.84136E+12', 2419210000000.0, 3128110000000.0) into table...

Inserting record ('Armenia', 'ARM', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '31162180567', '36048138201', 60834929642.0, 73201068526.0) into table...

Inserting record ('Australia', 'AUS', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '5.65711E+12', '6.58307E+12', 9242020000000.0, 10937100000000.0) into table...

Inserting record ('Austria', 'AUT', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '2.22685E+12', '2.30108E+12', 2603340000000.0, 2774530000000.0) into table...

Inserting record ('Azerbaijan', 'AZE', 'Human capital (constant 2018 US$)', 'NW.HCA.TO', '27450086133', '30684089751', 68530441552.0, 78910540804.0) into table...

Inserting reco

### Insert records from 'accounts_series_clean' dataframe to 'accounts_series' 

In [31]:
table = "accounts_series"
cols = "(series_code, topic, indicator_name)"
for index,row in accounts_series_clean.iterrows():
    record = (row[0],row[1],row[2])
    db_cursor = insert_table(db_cursor,table,cols,record)
db_cursor = show_all_records(db_cursor,table)

Inserting record ('NW.HCA.TO', 'Human capital', 'Human capital (constant 2018 US$)') into table...

Inserting record ('NW.HCA.PC', 'Human capital', 'Human capital per capita (constant 2018 US$)') into table...

Inserting record ('NW.HCA.FEMP.PC', 'Human capital', 'Human capital per capita, employed female (constant 2018 US$)') into table...

Inserting record ('NW.HCA.MEMP.PC', 'Human capital', 'Human capital per capita, employed male (constant 2018 US$)') into table...

Inserting record ('NW.HCA.FEMA.PC', 'Human capital', 'Human capital per capita, female (constant 2018 US$)') into table...

Inserting record ('NW.HCA.MALE.PC', 'Human capital', 'Human capital per capita, male (constant 2018 US$)') into table...

Inserting record ('NW.HCA.FSEM.PC', 'Human capital', 'Human capital per capita, self-employed female (constant 2018 US$)') into table...

Inserting record ('NW.HCA.MSEM.PC', 'Human capital', 'Human capital per capita, self-employed male (constant 2018 US$)') into table...

Inser

### Create index constraint on 'series_code' column of table 'accounts_series'

In [32]:
db_cursor.execute("CREATE INDEX idx_series_code ON accounts_series ( series_code )")

### Create relationship between 'accounts_data' to 'accounts_series' by making column 'indicator_code' in 'accounts_data' a FOREIGN KEY

In [33]:
db_cursor.execute("ALTER TABLE accounts_data ADD FOREIGN KEY (indicator_code) REFERENCES accounts_series(series_code)")

### Create index constraint on 'country_code' column of table 'accounts_data'

In [34]:
db_cursor.execute("CREATE INDEX idx_country_code ON accounts_data ( country_code )")

### Create relationship between 'accounts_country' to 'accounts_data' by making column 'country_code' in 'accounts_country' a FOREIGN KEY

In [35]:
db_cursor.execute("ALTER TABLE accounts_country ADD FOREIGN KEY (country_code) REFERENCES accounts_data(country_code)")

### Close cursor

In [36]:
db_cursor.close()

True