# ETL (Extract-Transform-Load) Project

## Import Dependencies

* Run the following cell to import the necessary packages to run through this etl.
* The only external package is **d6tstack**. So, you will need to install this in your virtual environment.
  * For more information on this package and how to install it, see <https://pypi.org/project/d6tstack/>.

In [1]:
# Dependencies
import pandas as pd
import numpy as np

# Database credentials
from config import username, password

# Used for making database connection.
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import Session
from sqlalchemy.engine import reflection
from sqlalchemy.schema import (
        MetaData,
        Table,
        DropTable,
        ForeignKeyConstraint,
        DropConstraint,
        )

# Used to abstract classes into tables.
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.automap import automap_base

# Used to declare column types.
from sqlalchemy import Column, Integer, String, Float, ForeignKey

# Used to load pandas dataframe into sql.
import d6tstack.utils
import time

# Ignore warnings.
import warnings
warnings.filterwarnings('ignore')

# Used for reading, writing to, and zipping files/folders.
from pathlib import Path
import os, zipfile
import shutil
import glob

## Constants

These are variables for items in this notebook that never change, such as csv file names, table names, database name, database connection string, etc.

In [2]:
# Data csv files
COMMODITY_TRADE_CSV = os.path.join(".", "Resources", "commodity_trade_statistics_data.csv")
FINANCIAL_AID_CSV =  os.path.join(".", "Resources", "june-9-data-csv-1.csv")
COMMODITY_CODES_CSV = os.path.join(".", "Resources", "un_comtrade_commodity_classifications.csv")

# Database/tables
DB_NAME = "trade_db"
COMMODITY_TABLE = "commodity"
COMMODITY_CATEGORY_TABLE = "commodity_category"
COMMODITY_CODE_TABLE = "commodity_code"
COUNTRY_TABLE = "country"
FINANCIAL_AID_TABLE = 'financial_aid'
COMMODITY_EXPORTS_TABLE = 'commodity_exports'
COMMODITY_IMPORTS_TABLE = 'commodity_imports'

# Database connection string for loading data into database.
cfg_uri_psql = f"postgresql+psycopg2://{username}:{password}@localhost/{DB_NAME}"

## Unzip data files into the Resources folder

* Before running this cell, create a folder in the project root directory (the same directory as this notebook) called **Resources**.
* After that, you can run this cell. Running the following cell will extract the data zip files into the **Resources** folder you created, which will contain the csv files needed for this project.

In [3]:
# Running this cell will unzip the data files in the Resources folder.
extension = ".zip"
extracted_dir_name = "."

# Get the current working directory.
# Need to be in the root directory of this project for this to work.
cwd_dir_name = os.getcwd()
print(f"The current working directory is {cwd_dir_name}.")

os.chdir("Resources") # change directory from working dir to dir with the zip file(s) .
# This should be the "Resources" folder.
dir_name = os.getcwd()
print(f"You are now in the following directory: {dir_name}.")

for item in os.listdir(dir_name): # loop through the items in the directory.
    if item.endswith(extension): # check for ".zip" extension"
        try:
            file_name = os.path.abspath(item) # get full path of files
            zip_ref = zipfile.ZipFile(file_name) # create zipfile object
            unzipped_directory = os.path.join(extracted_dir_name) # reference to the directory where the zip files will be extracted.
            zip_ref.extractall(unzipped_directory) # extract file to dir
            zip_ref.close() # close file
            print(f"Successfully unzipped {item} into the following folder:{dir_name}.")
        except:
            print(f"Error trying to unzip data file(s).")
            print(f"Make sure that the files are closed and you have the correct file/folder permissions.")
            
# Go up one directory into the project root directory.
os.chdir(os.path.normpath(os.getcwd() + os.sep + os.pardir))
print(os.path.normpath(os.getcwd() + os.sep + os.pardir))

The current working directory is C:\Users\phili\Desktop\etl_project.
You are now in the following directory: C:\Users\phili\Desktop\etl_project\Resources.
Successfully unzipped global-commodity-trade-statistics.zip into the following folder:C:\Users\phili\Desktop\etl_project\Resources.
Successfully unzipped june-9-data-csv-1.zip into the following folder:C:\Users\phili\Desktop\etl_project\Resources.
Successfully unzipped un_comtrade_commodity_classifications.zip into the following folder:C:\Users\phili\Desktop\etl_project\Resources.
C:\Users\phili\Desktop


## Store commodity csv into pandas dataframe

In [4]:
commodity_trade_df = pd.read_csv(COMMODITY_TRADE_CSV, low_memory=False, encoding ="utf-8")

commodity_trade_df.to_hdf('commodity_trade.h5', key='df', mode='w')

In [5]:
# Found out this read_hsf is faster than read_csv when working with really large datasets.
commodity_trade_df = pd.read_hdf('commodity_trade.h5', 'df')

commodity_trade_df

Unnamed: 0,country_or_area,year,comm_code,commodity,flow,trade_usd,weight_kg,quantity_name,quantity,category
0,Afghanistan,2016,010410,"Sheep, live",Export,6088,2339.0,Number of items,51.0,01_live_animals
1,Afghanistan,2016,010420,"Goats, live",Export,3958,984.0,Number of items,53.0,01_live_animals
2,Afghanistan,2008,010210,"Bovine animals, live pure-bred breeding",Import,1026804,272.0,Number of items,3769.0,01_live_animals
3,Albania,2016,010290,"Bovine animals, live, except pure-bred breeding",Import,2414533,1114023.0,Number of items,6853.0,01_live_animals
4,Albania,2016,010392,"Swine, live except pure-bred breeding > 50 kg",Import,14265937,9484953.0,Number of items,96040.0,01_live_animals
...,...,...,...,...,...,...,...,...,...,...
8225866,Zimbabwe,2001,TOTAL,ALL COMMODITIES,Export,1206807424,,No Quantity,,all_commodities
8225867,Zimbabwe,2001,TOTAL,ALL COMMODITIES,Re-Export,15943977,,No Quantity,,all_commodities
8225868,Zimbabwe,2000,TOTAL,ALL COMMODITIES,Export,1924962432,,No Quantity,,all_commodities
8225869,Zimbabwe,1995,TOTAL,ALL COMMODITIES,Import,2658853376,,No Quantity,,all_commodities


### Rename columns for commodity dataframe

In [6]:
commodity_trade_renamed_columns = commodity_trade_df.rename(columns={
    "comm_code": "commodity_code",
    "commodity": "commodity_description",
    "flow": "trade_flow",
    "trade_usd": "trade_value_usd",  
})

commodity_trade_renamed_columns

Unnamed: 0,country_or_area,year,commodity_code,commodity_description,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category
0,Afghanistan,2016,010410,"Sheep, live",Export,6088,2339.0,Number of items,51.0,01_live_animals
1,Afghanistan,2016,010420,"Goats, live",Export,3958,984.0,Number of items,53.0,01_live_animals
2,Afghanistan,2008,010210,"Bovine animals, live pure-bred breeding",Import,1026804,272.0,Number of items,3769.0,01_live_animals
3,Albania,2016,010290,"Bovine animals, live, except pure-bred breeding",Import,2414533,1114023.0,Number of items,6853.0,01_live_animals
4,Albania,2016,010392,"Swine, live except pure-bred breeding > 50 kg",Import,14265937,9484953.0,Number of items,96040.0,01_live_animals
...,...,...,...,...,...,...,...,...,...,...
8225866,Zimbabwe,2001,TOTAL,ALL COMMODITIES,Export,1206807424,,No Quantity,,all_commodities
8225867,Zimbabwe,2001,TOTAL,ALL COMMODITIES,Re-Export,15943977,,No Quantity,,all_commodities
8225868,Zimbabwe,2000,TOTAL,ALL COMMODITIES,Export,1924962432,,No Quantity,,all_commodities
8225869,Zimbabwe,1995,TOTAL,ALL COMMODITIES,Import,2658853376,,No Quantity,,all_commodities


### Drop null values from commodity dataframe

In [7]:
commodity_trade_no_null = commodity_trade_renamed_columns.dropna(how="any")

commodity_trade_no_null

Unnamed: 0,country_or_area,year,commodity_code,commodity_description,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category
0,Afghanistan,2016,010410,"Sheep, live",Export,6088,2339.0,Number of items,51.0,01_live_animals
1,Afghanistan,2016,010420,"Goats, live",Export,3958,984.0,Number of items,53.0,01_live_animals
2,Afghanistan,2008,010210,"Bovine animals, live pure-bred breeding",Import,1026804,272.0,Number of items,3769.0,01_live_animals
3,Albania,2016,010290,"Bovine animals, live, except pure-bred breeding",Import,2414533,1114023.0,Number of items,6853.0,01_live_animals
4,Albania,2016,010392,"Swine, live except pure-bred breeding > 50 kg",Import,14265937,9484953.0,Number of items,96040.0,01_live_animals
...,...,...,...,...,...,...,...,...,...,...
8225123,Tunisia,1995,TOTAL,ALL COMMODITIES,Export,5474626110,0.0,No Quantity,0.0,all_commodities
8225460,United Kingdom,1998,TOTAL,ALL COMMODITIES,Import,311879512430,0.0,No Quantity,0.0,all_commodities
8225461,United Kingdom,1998,TOTAL,ALL COMMODITIES,Export,270295285433,0.0,No Quantity,0.0,all_commodities
8225462,United Kingdom,1997,TOTAL,ALL COMMODITIES,Import,305135405241,0.0,No Quantity,0.0,all_commodities


### Verify commodity dataframe count

In [8]:
commodity_trade_no_null.count()

country_or_area          7876117
year                     7876117
commodity_code           7876117
commodity_description    7876117
trade_flow               7876117
trade_value_usd          7876117
weight_kg                7876117
quantity_name            7876117
quantity                 7876117
category                 7876117
dtype: int64

### Add auto incrementing id column to commodity dataframe

In [9]:
commodity_trade_no_null.insert(0, 'id', range(0, 0 + len(commodity_trade_no_null)))

commodity_trade_no_null

Unnamed: 0,id,country_or_area,year,commodity_code,commodity_description,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category
0,0,Afghanistan,2016,010410,"Sheep, live",Export,6088,2339.0,Number of items,51.0,01_live_animals
1,1,Afghanistan,2016,010420,"Goats, live",Export,3958,984.0,Number of items,53.0,01_live_animals
2,2,Afghanistan,2008,010210,"Bovine animals, live pure-bred breeding",Import,1026804,272.0,Number of items,3769.0,01_live_animals
3,3,Albania,2016,010290,"Bovine animals, live, except pure-bred breeding",Import,2414533,1114023.0,Number of items,6853.0,01_live_animals
4,4,Albania,2016,010392,"Swine, live except pure-bred breeding > 50 kg",Import,14265937,9484953.0,Number of items,96040.0,01_live_animals
...,...,...,...,...,...,...,...,...,...,...,...
8225123,7876112,Tunisia,1995,TOTAL,ALL COMMODITIES,Export,5474626110,0.0,No Quantity,0.0,all_commodities
8225460,7876113,United Kingdom,1998,TOTAL,ALL COMMODITIES,Import,311879512430,0.0,No Quantity,0.0,all_commodities
8225461,7876114,United Kingdom,1998,TOTAL,ALL COMMODITIES,Export,270295285433,0.0,No Quantity,0.0,all_commodities
8225462,7876115,United Kingdom,1997,TOTAL,ALL COMMODITIES,Import,305135405241,0.0,No Quantity,0.0,all_commodities


## Create new category dataframe from the commodity dataframe

In [10]:
# Create new category data frame with split value columns. 
category_df = commodity_trade_no_null["category"].str.split("_", n = 1, expand = True) 
  
# Make separate category_id column from new category data frame.
commodity_trade_no_null["category_id"]= category_df[0] 
  
# Dropping old category column. 
commodity_trade_no_null.drop(columns =["category"], inplace = True) 

commodity_trade_no_null

Unnamed: 0,id,country_or_area,year,commodity_code,commodity_description,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category_id
0,0,Afghanistan,2016,010410,"Sheep, live",Export,6088,2339.0,Number of items,51.0,01
1,1,Afghanistan,2016,010420,"Goats, live",Export,3958,984.0,Number of items,53.0,01
2,2,Afghanistan,2008,010210,"Bovine animals, live pure-bred breeding",Import,1026804,272.0,Number of items,3769.0,01
3,3,Albania,2016,010290,"Bovine animals, live, except pure-bred breeding",Import,2414533,1114023.0,Number of items,6853.0,01
4,4,Albania,2016,010392,"Swine, live except pure-bred breeding > 50 kg",Import,14265937,9484953.0,Number of items,96040.0,01
...,...,...,...,...,...,...,...,...,...,...,...
8225123,7876112,Tunisia,1995,TOTAL,ALL COMMODITIES,Export,5474626110,0.0,No Quantity,0.0,all
8225460,7876113,United Kingdom,1998,TOTAL,ALL COMMODITIES,Import,311879512430,0.0,No Quantity,0.0,all
8225461,7876114,United Kingdom,1998,TOTAL,ALL COMMODITIES,Export,270295285433,0.0,No Quantity,0.0,all
8225462,7876115,United Kingdom,1997,TOTAL,ALL COMMODITIES,Import,305135405241,0.0,No Quantity,0.0,all


In [11]:
# Rename columns to be something more meaningful.
category_df = category_df.rename(columns={
    0: "category_id",
    1: "category_name"
})

# Drop duplicate categories.
category_df.drop_duplicates("category_id", inplace=True)

category_df

Unnamed: 0,category_id,category_name
0,01,live_animals
56580,02,meat_and_edible_meat_offal
156580,03,fish_crustaceans_molluscs_aquatic_invertebrate...
256580,04,dairy_products_eggs_honey_edible_animal_produc...
356580,05,products_of_animal_origin_nes
...,...,...
7969697,95,toys_games_sports_requisites
8069695,96,miscellaneous_manufactured_articles
8169697,97,works_of_art_collectors_pieces_and_antiques
8209719,99,commodities_not_specified_according_to_kind


## Create new commodity codes dataframe

In [12]:
# Create new commodity codes dataframe that contains commodity code and commodity description columns.

# Store commodity codes classification csv into pandas dataframe
commodity_codes_df = pd.read_csv(COMMODITY_CODES_CSV, low_memory=False, encoding ="utf-8")

commodity_codes_df.to_hdf('commodity_codes.h5', key='df', mode='w')

# Found out this read_hsf is faster than read_csv when working with really large datasets.
commodity_codes_df = pd.read_hdf('commodity_codes.h5', 'df')

# Rename columns
commodity_codes_df = commodity_codes_df.rename(columns={
    "Code": "commodity_code",
    "Description": "commodity_description"
})

# Drop duplicate codes
commodity_codes_df.drop_duplicates("commodity_code", inplace=True)

# Drop unnecessary columns
commodity_codes_df = commodity_codes_df[["commodity_code", "commodity_description"]]

commodity_codes_df

Unnamed: 0,commodity_code,commodity_description
0,1,Food and beverages
1,11,"Food and beverages, primary"
2,111,"Food and beverages, primary, mainly for industry"
3,112,"Food and beverages, primary, mainly for househ..."
4,12,"Food and beverages, processed"
...,...,...
51353,84625,"Other women's full-length/knee-length hosiery,..."
51691,89842,Magnetic media for the recording of sound/of o...
51692,89844,Optical media for the recording of sound/of ot...
51693,89846,Semiconductor media for the recording of sound...


### Verify commodity codes dataframe count


In [13]:
commodity_codes_df.count()

commodity_code           13616
commodity_description    13616
dtype: int64

### Remove duplicate commodity_description column from commodity dataframe

In [14]:
del commodity_trade_no_null["commodity_description"]

commodity_trade_no_null

Unnamed: 0,id,country_or_area,year,commodity_code,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category_id
0,0,Afghanistan,2016,010410,Export,6088,2339.0,Number of items,51.0,01
1,1,Afghanistan,2016,010420,Export,3958,984.0,Number of items,53.0,01
2,2,Afghanistan,2008,010210,Import,1026804,272.0,Number of items,3769.0,01
3,3,Albania,2016,010290,Import,2414533,1114023.0,Number of items,6853.0,01
4,4,Albania,2016,010392,Import,14265937,9484953.0,Number of items,96040.0,01
...,...,...,...,...,...,...,...,...,...,...
8225123,7876112,Tunisia,1995,TOTAL,Export,5474626110,0.0,No Quantity,0.0,all
8225460,7876113,United Kingdom,1998,TOTAL,Import,311879512430,0.0,No Quantity,0.0,all
8225461,7876114,United Kingdom,1998,TOTAL,Export,270295285433,0.0,No Quantity,0.0,all
8225462,7876115,United Kingdom,1997,TOTAL,Import,305135405241,0.0,No Quantity,0.0,all


## Create new countries dataframe from the commodity dataframe

In [15]:
# Create new countries dataframe that contains country name and unique id columns.
countries_df = commodity_trade_no_null.loc[:,["country_or_area"]]

# Drop duplicate codes
countries_df.drop_duplicates("country_or_area", inplace=True)

# Drop nulls
countries_df.dropna(how="any", inplace=True)

countries_df

Unnamed: 0,country_or_area
0,Afghanistan
3,Albania
262,Algeria
469,Andorra
763,Angola
...,...
55828,Yemen
55959,Zambia
56308,Zimbabwe
109486,Djibouti


### Add auto-incrementing id column to countries dataframe


In [16]:
countries_df.insert(0, 'id', range(0, 0 + len(countries_df)))

countries_df

Unnamed: 0,id,country_or_area
0,0,Afghanistan
3,1,Albania
262,2,Algeria
469,3,Andorra
763,4,Angola
...,...,...
55828,202,Yemen
55959,203,Zambia
56308,204,Zimbabwe
109486,205,Djibouti


### Merge countries dataframe with commodity dataframe on country name

In [17]:
# Merge on country name using pandas.
countries_commodities_merged = pd.merge(commodity_trade_no_null, countries_df, on="country_or_area", how="outer")

# Rename columns
countries_commodities_merged = countries_commodities_merged.rename(columns={
    "id_y": "country_id",
    "id_x": "id"
})

# Drop country name column.
del countries_commodities_merged["country_or_area"]

countries_commodities_merged

Unnamed: 0,id,year,commodity_code,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category_id,country_id
0,0,2016,010410,Export,6088,2339.0,Number of items,51.0,01,0
1,1,2016,010420,Export,3958,984.0,Number of items,53.0,01,0
2,2,2008,010210,Import,1026804,272.0,Number of items,3769.0,01,0
3,45602,2011,020450,Import,3441,1254.0,Weight in kilograms,1254.0,02,0
4,45603,2008,020210,Import,6849,1.0,Weight in kilograms,1.0,02,0
...,...,...,...,...,...,...,...,...,...,...
7876112,7872424,1998,970400,Import,5635,0.0,No Quantity,0.0,97,206
7876113,7872425,1997,970400,Import,544,0.0,No Quantity,0.0,97,206
7876114,7875958,1999,9999AA,Import,82760,0.0,No Quantity,0.0,99,206
7876115,7875959,1998,9999AA,Import,108202,0.0,No Quantity,0.0,99,206


## Split commodity dataframe into 2 dataframes based on trade flow - exports and imports

### Find all possible values of trade_flow column

In [18]:
countries_commodities_merged["trade_flow"].value_counts()

Import       4640584
Export       2743469
Re-Export     347471
Re-Import     144593
Name: trade_flow, dtype: int64

### Create new dataframe for commodity exports

In [19]:
exports_df = countries_commodities_merged.loc[
    (countries_commodities_merged["trade_flow"] == "Export") | 
    (countries_commodities_merged["trade_flow"] == "Re-Export")]

exports_df

Unnamed: 0,id,year,commodity_code,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category_id,country_id
0,0,2016,010410,Export,6088,2339.0,Number of items,51.0,01,0
1,1,2016,010420,Export,3958,984.0,Number of items,53.0,01,0
9,244983,2016,040310,Export,200173,62313.0,Weight in kilograms,62313.0,04,0
14,244988,2016,040900,Export,1047,100.0,Weight in kilograms,100.0,04,0
18,244992,2015,040310,Export,120515,34617.0,Weight in kilograms,34617.0,04,0
...,...,...,...,...,...,...,...,...,...,...
7875555,7827741,2009,961800,Export,461,80.0,Weight in kilograms,80.0,96,205
7875826,1741680,2004,210310,Export,4298,200.0,Weight in kilograms,200.0,21,206
7876059,7279216,2005,890310,Export,457,1.0,Number of items,0.0,89,206
7876077,7516252,2003,920300,Export,2310,51.0,Number of items,2.0,92,206


### Create new dataframe for commodity imports

In [20]:
imports_df = countries_commodities_merged.loc[
    (countries_commodities_merged["trade_flow"] == "Import") | 
    (countries_commodities_merged["trade_flow"] == "Re-Import")]

imports_df

Unnamed: 0,id,year,commodity_code,trade_flow,trade_value_usd,weight_kg,quantity_name,quantity,category_id,country_id
2,2,2008,010210,Import,1026804,272.0,Number of items,3769.0,01,0
3,45602,2011,020450,Import,3441,1254.0,Weight in kilograms,1254.0,02,0
4,45603,2008,020210,Import,6849,1.0,Weight in kilograms,1.0,02,0
5,145425,2010,030211,Import,8600,9000.0,Weight in kilograms,9000.0,03,0
6,244980,2016,040210,Import,14737150,7665593.0,Weight in kilograms,7665593.0,04,0
...,...,...,...,...,...,...,...,...,...,...
7876112,7872424,1998,970400,Import,5635,0.0,No Quantity,0.0,97,206
7876113,7872425,1997,970400,Import,544,0.0,No Quantity,0.0,97,206
7876114,7875958,1999,9999AA,Import,82760,0.0,No Quantity,0.0,99,206
7876115,7875959,1998,9999AA,Import,108202,0.0,No Quantity,0.0,99,206


## Connect to local database

In [21]:
rds_connection_string = f"{username}:{password}@localhost:5432/{DB_NAME}"
engine = create_engine(f'postgresql://{rds_connection_string}')

## DANGEROUS: Drops everything in database

Use with caution!

In [22]:
def db_DropEverything(engine):
    # From http://www.sqlalchemy.org/trac/wiki/UsageRecipes/DropEverything

    conn = engine.connect()

    trans = conn.begin()

    inspector = inspect(engine)

    metadata = MetaData()

    tbs = []
    all_fks = []

    for table_name in inspector.get_table_names():
        fks = []
        for fk in inspector.get_foreign_keys(table_name):
            if not fk['name']:
                continue
            fks.append(
                ForeignKeyConstraint((),(),name=fk['name'])
                )
        t = Table(table_name,metadata,*fks)
        tbs.append(t)
        all_fks.extend(fks)

    for fkc in all_fks:
        conn.execute(DropConstraint(fkc))

    for table in tbs:
        conn.execute(DropTable(table))

    trans.commit()

In [23]:
db_DropEverything(engine)

## Create classes/schemas that will be associated with tables in the database.

In [24]:
# Sets an object to utilize the default declarative base in SQL Alchemy.
Base = declarative_base()

In [25]:
# Clear out db
Base.metadata.drop_all(bind=engine)

In [26]:
# Create classes and define schemas for different tables
class CommodityCategory(Base):
    __tablename__ = COMMODITY_CATEGORY_TABLE
    category_id = Column(String(255), primary_key=True, nullable=False, unique=True)
    category_name = Column(String(255), nullable=False)
    
class CommodityCode(Base):
    __tablename__ = COMMODITY_CODE_TABLE
    commodity_code = Column(String(255), primary_key=True, nullable=False, unique=True)
    commodity_description = Column(String(400), nullable=False)
    
class Country(Base):
    __tablename__ = COUNTRY_TABLE
    id = Column(Integer, primary_key=True, nullable=False, unique=True)
    country_or_area = Column(String(255), nullable=False)

class CommodityExports(Base):
    __tablename__ = COMMODITY_EXPORTS_TABLE
    id = Column(Integer, primary_key=True, nullable=False, unique=True)
    year = Column(Integer, nullable=False)
    comodity_code = Column(String(255), ForeignKey(f"{COMMODITY_CODE_TABLE}.commodity_code"), nullable=False)
    trade_flow = Column(String(255), nullable=False)
    trade_value_usd = Column(String(255), nullable=False)
    weight_kg = Column(Float, nullable=False)
    quantity_name = Column(String(255), nullable=False)
    quantity = Column(Float, nullable=False)
    category_id = Column(String(255), ForeignKey(f"{COMMODITY_CATEGORY_TABLE}.category_id"), nullable=False)
    country_id = Column(Integer, ForeignKey(f"{COUNTRY_TABLE}.id"), nullable=False)
    
class CommodityImports(Base):
    __tablename__ = COMMODITY_IMPORTS_TABLE
    id = Column(Integer, primary_key=True, nullable=False, unique=True)
    year = Column(Integer, nullable=False)
    comodity_code = Column(String(255), ForeignKey(f"{COMMODITY_CODE_TABLE}.commodity_code"), nullable=False)
    trade_flow = Column(String(255), nullable=False)
    trade_value_usd = Column(String(255), nullable=False)
    weight_kg = Column(Float, nullable=False)
    quantity_name = Column(String(255), nullable=False)
    quantity = Column(Float, nullable=False)
    category_id = Column(String(255), ForeignKey(f"{COMMODITY_CATEGORY_TABLE}.category_id"), nullable=False)
    country_id = Column(Integer, ForeignKey(f"{COUNTRY_TABLE}.id"), nullable=False)
    
class FinancialAid(Base):
    __tablename__ = FINANCIAL_AID_TABLE
    id = Column(Integer, primary_key=True)
    donor_type = Column(String(255))
    aid_type = Column(String(255))
    receiver = Column(String(255))
    amount = Column(Float)
    currency = Column(String(255))
    USD_amount = Column(Float)
    country_id = Column(Integer, ForeignKey(f"{COUNTRY_TABLE}.id"), nullable=False)
    
Base.metadata.create_all(engine)

# Create (if not already in existence) the table associated with class.
Base.metadata.create_all(engine)

## Check for tables

In [27]:
engine.table_names()

['commodity_code',
 'commodity_exports',
 'commodity_category',
 'country',
 'commodity_imports',
 'financial_aid']

## Load final pandas dataframes into sql

### Use pandas/d6tstack to load commodity category dataframe into sql

In [28]:
start_time = time.time()
d6tstack.utils.pd_to_psql(category_df, cfg_uri_psql, COMMODITY_CATEGORY_TABLE, if_exists='append')
print("Time to load category dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load category dataframe into sql:
--- 0.33183979988098145 seconds ---


### Use pandas/d6tstack to load commodity codes dataframe into sql

In [29]:
start_time = time.time()
d6tstack.utils.pd_to_psql(commodity_codes_df, cfg_uri_psql, COMMODITY_CODE_TABLE, if_exists='append', sep='\t')
print("Time to load codes dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load codes dataframe into sql:
--- 0.297680139541626 seconds ---


### Use pandas/d6tstack to load country dataframe into sql

In [30]:
start_time = time.time()
d6tstack.utils.pd_to_psql(countries_df, cfg_uri_psql, COUNTRY_TABLE, if_exists='append', sep='\t')
print("Time to load country dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load country dataframe into sql:
--- 0.11029219627380371 seconds ---


### Use pandas/d6tstack to load commodity exports dataframe into sql

In [31]:
start_time = time.time()
d6tstack.utils.pd_to_psql(exports_df, cfg_uri_psql, COMMODITY_EXPORTS_TABLE, if_exists='append',sep='\t')
print("Time to load commodity exports dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load commodity exports dataframe into sql:
--- 181.22635006904602 seconds ---


### Use pandas/d6tstack to load commodity imports dataframe into sql

In [32]:
start_time = time.time()
d6tstack.utils.pd_to_psql(imports_df, cfg_uri_psql, COMMODITY_IMPORTS_TABLE, if_exists='append',sep='\t')
print("Time to load commodity imports dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load commodity imports dataframe into sql:
--- 270.16224098205566 seconds ---


## Create session object to connect to database

In [33]:
session = Session(bind=engine)

## Confirm data from pandas dataframes have been added to database.

### Colllect the names of the tables within the database

In [34]:
inspector = inspect(engine)
inspector.get_table_names()

['commodity_code',
 'commodity_exports',
 'commodity_category',
 'country',
 'commodity_imports',
 'financial_aid']

### Confirm category data has been added by querying the commedity category table

In [35]:
category_list = session.query(CommodityCategory).limit(10)
for category in category_list:
    print(f"id: {category.category_id}, category name: {category.category_name}")

id: 01, category name: live_animals
id: 02, category name: meat_and_edible_meat_offal
id: 03, category name: fish_crustaceans_molluscs_aquatic_invertebrates_ne
id: 04, category name: dairy_products_eggs_honey_edible_animal_product_nes
id: 05, category name: products_of_animal_origin_nes
id: 06, category name: live_trees_plants_bulbs_roots_cut_flowers_etc
id: 07, category name: edible_vegetables_and_certain_roots_and_tubers
id: 08, category name: edible_fruit_nuts_peel_of_citrus_fruit_melons
id: 09, category name: coffee_tea_mate_and_spices
id: 10, category name: cereals


In [36]:
# Print column names and types
category_columns = inspector.get_columns(COMMODITY_CATEGORY_TABLE)
for column in category_columns:
    print(column["name"], column["type"])

category_id VARCHAR(255)
category_name VARCHAR(255)


### Confirm country data has been added by querying the country table

In [37]:
country_list = session.query(Country).limit(10)
for country in country_list:
    print(f"id: {country.id}, country name: {country.country_or_area}")

id: 0, country name: Afghanistan
id: 1, country name: Albania
id: 2, country name: Algeria
id: 3, country name: Andorra
id: 4, country name: Angola
id: 5, country name: Anguilla
id: 6, country name: Antigua and Barbuda
id: 7, country name: Argentina
id: 8, country name: Armenia
id: 9, country name: Aruba


In [38]:
# Print column names and types
country_columns = inspector.get_columns(COUNTRY_TABLE)
for column in country_columns:
    print(column["name"], column["type"])

id INTEGER
country_or_area VARCHAR(255)


### Confirm commodity codes data has been added by querying the commodity codes table

In [39]:
codes_list = session.query(CommodityCode).limit(10)
for code in codes_list:
    print(f"code: {code.commodity_code}, description: {code.commodity_description}")

code: 1, description: Food and beverages
code: 11, description: Food and beverages, primary
code: 111, description: Food and beverages, primary, mainly for industry
code: 112, description: Food and beverages, primary, mainly for household consumption
code: 12, description: Food and beverages, processed
code: 121, description: Food and beverages, processed, mainly for industry
code: 122, description: Food and beverages, processed, mainly for household consumption
code: 2, description: Industrial supplies nes
code: 21, description: Industrial supplies nes, primary
code: 22, description: Industrial supplies nes, processed


In [40]:
# Print column names and types
code_columns = inspector.get_columns(COMMODITY_CODE_TABLE)
for column in code_columns:
    print(column["name"], column["type"])

commodity_code VARCHAR(255)
commodity_description VARCHAR(400)


### Confirm commodity exports data has been added by querying the exports table

In [41]:
exports_list = session.query(CommodityExports).limit(10)
for commodity in exports_list:
    print(f"commodity: {commodity.id}, trade flow: {commodity.trade_flow}")

commodity: 0, trade flow: Export
commodity: 1, trade flow: Export
commodity: 244983, trade flow: Export
commodity: 244988, trade flow: Export
commodity: 244992, trade flow: Export
commodity: 245000, trade flow: Export
commodity: 245005, trade flow: Export
commodity: 245010, trade flow: Export
commodity: 245015, trade flow: Export
commodity: 344203, trade flow: Export


In [42]:
# Print column names and types
exports_columns = inspector.get_columns(COMMODITY_EXPORTS_TABLE)
for column in exports_columns:
    print(column["name"], column["type"])

id INTEGER
year INTEGER
comodity_code VARCHAR(255)
trade_flow VARCHAR(255)
trade_value_usd VARCHAR(255)
weight_kg DOUBLE PRECISION
quantity_name VARCHAR(255)
quantity DOUBLE PRECISION
category_id VARCHAR(255)
country_id INTEGER


### Confirm commodity imports data has been added by querying the imports table

In [43]:
imports_list = session.query(CommodityImports).limit(10)
for commodity in imports_list:
    print(f"commodity: {commodity.id}, trade flow: {commodity.trade_flow}")

commodity: 2, trade flow: Import
commodity: 45602, trade flow: Import
commodity: 45603, trade flow: Import
commodity: 145425, trade flow: Import
commodity: 244980, trade flow: Import
commodity: 244981, trade flow: Import
commodity: 244982, trade flow: Import
commodity: 244984, trade flow: Import
commodity: 244985, trade flow: Import
commodity: 244986, trade flow: Import


In [44]:
# Print column names and types
imports_columns = inspector.get_columns(COMMODITY_IMPORTS_TABLE)
for column in imports_columns:
    print(column["name"], column["type"])

id INTEGER
year INTEGER
comodity_code VARCHAR(255)
trade_flow VARCHAR(255)
trade_value_usd VARCHAR(255)
weight_kg DOUBLE PRECISION
quantity_name VARCHAR(255)
quantity DOUBLE PRECISION
category_id VARCHAR(255)
country_id INTEGER


## Join tables in database

### Reflect database into ORM classes

In [45]:
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()

['commodity_code',
 'commodity_exports',
 'commodity_category',
 'country',
 'commodity_imports',
 'financial_aid']

### Map classes

In [46]:
CE = Base.classes[COMMODITY_EXPORTS_TABLE]
CO = Base.classes[COMMODITY_CODE_TABLE]
CAT = Base.classes[COMMODITY_CATEGORY_TABLE]
COU = Base.classes[COUNTRY_TABLE]

### Join commodity exports table and country tables

In [47]:
sel = [CE.id, CE.year, CE.trade_flow, CE.trade_value_usd, CE.weight_kg, CE.quantity_name, CE.quantity,
      CE.country_id, CO.commodity_description, COU.country_or_area]
query = session.query(*sel).filter(CE.country_id == COU.id).limit(10).all()


for record in query:
    (CE.id, CE.year, CE.trade_flow, CE.trade_value_usd, CE.weight_kg, CE.quantity_name, CE.quantity,
     CE.country_id, CO.commodity_description, COU.country_or_area) = record
    print(record)

(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, primary', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, primary, mainly for industry', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, primary, mainly for household consumption', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, processed', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, processed, mainly for industry', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Food and beverages, processed, mainly for household consumption', 'Afghanistan')
(0, 2016, 'Export', '6088', 2339.0, 'Number of items', 51.0, 0, 'Industrial supplies nes', 'Afghanistan')
(0, 2016, 'Export

## Store financial aid csv into pandas dataframe

In [48]:
financial_df = pd.read_csv(FINANCIAL_AID_CSV)
financial_df

Unnamed: 0,Donor Name,Donor Country,Flow Type,Donor Type,Aid Type,Transaction Type,Recipient Country,Recipient Type,Receiver,Remarks,...,Target Geography,Targeted beneficiaries,Amount,Currency,Disbursement Date,USD Amount,Information Source,Reference Link,Updated by,Email Address
0,United Kingdom,United Kingdom,International,Government,Cash,Disbursement,Not Known,National/Regional NGO,AAIN,,...,,,770416.0,USD,7/2/15,770416,Financial Tracking Service - June 2 2015,http://fts.unocha.org/pageloader.aspx?page=eme...,Alina Acharya,earthquake@opennepal.net
1,Sweden,Sweden,International,Government,Cash,Disbursement,France,Others,ACF,,...,,,580585.0,USD,7/2/15,580585,Financial Tracking Service - June 2 2015,http://fts.unocha.org/pageloader.aspx?page=eme...,Alina Acharya,earthquake@opennepal.net
2,Denmark,Denmark,International,Government,Cash,Disbursement,Not Known,National/Regional NGO,ADRA,,...,,,74118.0,USD,7/2/15,74118,Financial Tracking Service - June 2 2015,http://fts.unocha.org/pageloader.aspx?page=eme...,Alina Acharya,earthquake@opennepal.net
3,European Commission's Humanitarian Aid and Civ...,Not Applicable\r\n,International,Multilateral,Cash,Disbursement,Not Known,National/Regional NGO,ADRA,,...,,,486726.0,USD,7/2/15,486726,Financial Tracking Service - June 2 2015,http://fts.unocha.org/pageloader.aspx?page=eme...,Alina Acharya,earthquake@opennepal.net
4,Norway,Norway,International,Government,Cash,Disbursement,Not Known,National/Regional NGO,ADRA,,...,,,661201.0,USD,6/21/15,661201,Financial Tracking Service - May 14 2015,http://fts.unocha.org/pageloader.aspx?page=eme...,Alina Acharya,earthquake@opennepal.net
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1166,Communist Party Of Nepal (United),Nepal,National,Others,Cash,Disbursement,Nepal,Government,Prime Minister's Disaster Relief Fund,,...,,,55000.0,,6/8/15,541.872,Prime Minister's Disaster Relief Fund - June 8...,http://pmrelief.opmcm.gov.np/contributors.aspx,Alina Acharya,earthquake@opennepal.net
1167,Bhaktapur Finance Kamli,Nepal,National,Corporates,Cash,Disbursement,Nepal,Government,Prime Minister's Disaster Relief Fund,,...,,,500000.0,,6/8/15,4926.108,Prime Minister's Disaster Relief Fund - June 8...,http://pmrelief.opmcm.gov.np/contributors.aspx,Alina Acharya,earthquake@opennepal.net
1168,Tel Welfare Society,Nepal,National,Others,Cash,Disbursement,Nepal,Government,Prime Minister's Disaster Relief Fund,,...,,,401111.0,,6/8/15,3951.833,Prime Minister's Disaster Relief Fund - June 8...,http://pmrelief.opmcm.gov.np/contributors.aspx,Alina Acharya,earthquake@opennepal.net
1169,Jebil'S Finance Limited,Nepal,National,Corporates,Cash,Disbursement,Nepal,Government,Prime Minister's Disaster Relief Fund,,...,,,360000.0,,6/8/15,3546.798,Prime Minister's Disaster Relief Fund - June 8...,http://pmrelief.opmcm.gov.np/contributors.aspx,Alina Acharya,earthquake@opennepal.net


### Verify financial aid dataframe count

In [49]:
financial_df.count()

Donor Name                1171
Donor Country             1166
Flow Type                 1171
Donor Type                1171
Aid Type                  1170
Transaction Type          1107
Recipient Country         1171
Recipient Type            1165
Receiver                  1171
Remarks                     30
Description                487
Target Geography             7
Targeted beneficiaries       3
Amount                    1137
Currency                  1060
Disbursement Date         1113
USD Amount                1137
Information Source        1171
Reference Link            1171
Updated by                1171
Email Address             1171
dtype: int64

### Drop null values from financial aid dataframe and remove unnecessary columns

In [50]:
cleaned_financial_df = financial_df[['Donor Country','Donor Type','Aid Type','Receiver','Amount','Currency','USD Amount']]
cleaned_financial_df = cleaned_financial_df.dropna()
cleaned_financial_df.count()

Donor Country    1051
Donor Type       1051
Aid Type         1051
Receiver         1051
Amount           1051
Currency         1051
USD Amount       1051
dtype: int64

### Rename columns of financial aid dataframe

In [51]:
rn_cleaned_financial_df = cleaned_financial_df.rename(columns={
                                                     'Donor Country': 'donor_country',
                                                     'Donor Type': 'donor_type',
                                                     'Aid Type': 'aid_type',
                                                     'Receiver': 'receiver',
                                                     'Amount': 'amount',
                                                     'Currency': 'currency',
                                                     'USD Amount': 'USD_amount'})

rn_cleaned_financial_df

Unnamed: 0,donor_country,donor_type,aid_type,receiver,amount,currency,USD_amount
0,United Kingdom,Government,Cash,AAIN,770416.0,USD,770416
1,Sweden,Government,Cash,ACF,580585.0,USD,580585
2,Denmark,Government,Cash,ADRA,74118.0,USD,74118
3,Not Applicable\r\n,Multilateral,Cash,ADRA,486726.0,USD,486726
4,Norway,Government,Cash,ADRA,661201.0,USD,661201
...,...,...,...,...,...,...,...
1097,Not Known,Others,Cash,Prime Minister's Disaster Relief Fund,217058.0,NPR,2138.502
1098,Nepal,Corporates,Cash,Prime Minister's Disaster Relief Fund,101111.0,NPR,996.167
1099,Nepal,Others,Cash,Prime Minister's Disaster Relief Fund,111380.0,NPR,1097.34
1100,Nepal,Individual,Cash,Prime Minister's Disaster Relief Fund,121481.0,NPR,1196.857


### Clean and standardize country names

In [52]:
rn_cleaned_financial_df.donor_country.unique()

array(['United Kingdom', 'Sweden', 'Denmark', 'Not Applicable\r\n',
       'Norway', 'United States', 'Nepal', 'Azerbaijan', 'Bangladesh',
       'Bhutan', 'Czech Republic', 'France', 'Greece', 'Indonesia',
       'Italy', 'Japan', 'New Zealand', 'Poland', 'Spain', 'Canada',
       'Germany', 'Ireland', 'Belgium', 'China', 'Austria',
       'Korea, Republic of', 'Monaco', 'Not Applicable', 'Australia',
       'Not Known', 'HOLY SEE (VATICAN CITY STATE)', 'Netherlands',
       'South Africa', 'Morocco', 'India', 'Finland', 'Singapore',
       'Thailand', 'Turkey', 'United Arab Emirates', 'Switzerland',
       'Andorra', 'Armenia', 'Bulgaria', 'Lithuania', 'Malta', 'Slovenia',
       'Hong Kong', 'Iceland', 'Luxembourg', 'Algeria', 'Saudi Arabia',
       'Philippines', 'CANADA', 'TAIWAN, PROVINCE OF CHINA', 'Estonia',
       'Hungary', 'Israel', 'Kuwait', 'Malaysia', 'Mexico', 'Pakistan',
       'Qatar', 'Russian Federation', 'Sri Lanka', 'Timor-Leste',
       'Zimbabwe', 'Congo', 'Viet 

In [53]:
combined_cleaned_df = rn_cleaned_financial_df.replace({
    'HOLY SEE (VATICAN CITY STATE)':'Italy',
    'Korea, Republic of': 'Rep. of Korea',
    'TAIWAN, PROVINCE OF CHINA': 'China',
    'CANADA': 'Canada',
    'Baharain': 'Bahrain',
    'Czech Republic': 'Czech Rep.',
    'Monaco': 'Morocco',
    'United States': 'USA',
    'Hong Kong': 'China'
})

In [54]:
# Dropping bad values
rn_cleaned_financial_df = rn_cleaned_financial_df.drop(rn_cleaned_financial_df[rn_cleaned_financial_df.donor_country.isin(['Not Applicable\r\n','Not Applicable','Not Known'])].index)

rn_cleaned_financial_df.donor_country.unique()
sorted_df = combined_cleaned_df.sort_values(by='donor_country')
sorted_df.donor_country.unique()
# sorted_df.to_excel('connor.xlsx')

array(['Algeria', 'Andorra', 'Armenia', 'Australia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belgium', 'Bhutan',
       'Bulgaria', 'Canada', 'China', 'Congo', 'Croatia', 'Czech Rep.',
       'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Ghana',
       'Greece', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland',
       'Israel', 'Italy', 'Japan', 'Korea (Republic of)', 'Kuwait',
       'Lithuania', 'Luxembourg', 'Malaysia', 'Malta', 'Mexico',
       'Morocco', 'Nepal', 'Netherlands', 'New Zealand', 'Norway',
       'Not Applicable', 'Not Applicable\r\n', 'Not Known', 'Pakistan',
       'Philippines', 'Poland', 'Qatar', 'Rep. of Korea',
       'Russian Federation', 'Saudi Arabia', 'Singapore', 'Slovenia',
       'South Africa', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland',
       'Thailand', 'Timor-Leste', 'Turkey', 'USA', 'United Arab Emirates',
       'United Kingdom', 'Viet Nam', 'Zimbabwe'], dtype=object)

In [55]:
countries_df.country_or_area.unique()
# countries_df.to_excel('phil.xlsx')

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belgium-Luxembourg', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
       'Bolivia (Plurinational State of)', 'Bosnia Herzegovina',
       'Botswana', 'Brazil', 'Brunei Darussalam', 'Bulgaria',
       'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Rep.', 'Chad', 'Chile', 'China',
       'China, Hong Kong SAR', 'China, Macao SAR', 'Colombia', 'Comoros',
       'Congo', 'Cook Isds', 'Costa Rica', "Côte d'Ivoire", 'Croatia',
       'Cuba', 'Cyprus', 'Czech Rep.', 'Denmark', 'Dominica',
       'Dominican Rep.', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea',
       'Estonia', 'Ethiopia', 'EU-28', 'Faeroe Isds', 'Fiji', 'Finland',
       'Fmr Fed. Rep. of Germany', 'Fmr Sudan', 'Fra

## Add auto incrementing id column to financial aid dataframe

In [56]:
combined_cleaned_df.insert(0, 'id', range(0, 0 + len(combined_cleaned_df)))

In [57]:
combined_cleaned_df

Unnamed: 0,id,donor_country,donor_type,aid_type,receiver,amount,currency,USD_amount
0,0,United Kingdom,Government,Cash,AAIN,770416.0,USD,770416
1,1,Sweden,Government,Cash,ACF,580585.0,USD,580585
2,2,Denmark,Government,Cash,ADRA,74118.0,USD,74118
3,3,Not Applicable\r\n,Multilateral,Cash,ADRA,486726.0,USD,486726
4,4,Norway,Government,Cash,ADRA,661201.0,USD,661201
...,...,...,...,...,...,...,...,...
1097,1046,Not Known,Others,Cash,Prime Minister's Disaster Relief Fund,217058.0,NPR,2138.502
1098,1047,Nepal,Corporates,Cash,Prime Minister's Disaster Relief Fund,101111.0,NPR,996.167
1099,1048,Nepal,Others,Cash,Prime Minister's Disaster Relief Fund,111380.0,NPR,1097.34
1100,1049,Nepal,Individual,Cash,Prime Minister's Disaster Relief Fund,121481.0,NPR,1196.857


## Inner merge financial aid dataframe with countries dataframe

In [58]:
combined_w_country_id = pd.merge(combined_cleaned_df,countries_df,how='inner',left_on='donor_country',right_on='country_or_area')
combined_w_country_id

Unnamed: 0,id_x,donor_country,donor_type,aid_type,receiver,amount,currency,USD_amount,id_y,country_or_area
0,0,United Kingdom,Government,Cash,AAIN,770416.00,USD,770416,194,United Kingdom
1,30,United Kingdom,Government,Cash,CARE Nepal,1078582.00,USD,1078582,194,United Kingdom
2,39,United Kingdom,Government,Cash,DEC (UK),7396450.00,USD,7396450,194,United Kingdom
3,52,United Kingdom,Government,Cash,HI,462250.00,USD,462250,194,United Kingdom
4,75,United Kingdom,Government,Cash,IOM,1155624.00,USD,1155624,194,United Kingdom
...,...,...,...,...,...,...,...,...,...,...
933,524,Congo,Government,Cash,Prime Minister's Disaster Relief Fund,936014.40,NPR,9222,44,Congo
934,618,Viet Nam,Others,Cash,Prime Minister's Disaster Relief Fund,5114000.00,NPR,50384,200,Viet Nam
935,734,Croatia,Others,Cash,Prime Minister's Disaster Relief Fund,5193877.74,NPR,51171,48,Croatia
936,926,Ghana,Individual,Cash,Prime Minister's Disaster Relief Fund,30618.00,NPR,302,75,Ghana


In [59]:
combined_w_country_id = combined_w_country_id.rename(columns={'id_y': 'country_id', 'id_x': 'id'})
del combined_w_country_id['donor_country']
del combined_w_country_id['country_or_area']

In [60]:
combined_w_country_id

Unnamed: 0,id,donor_type,aid_type,receiver,amount,currency,USD_amount,country_id
0,0,Government,Cash,AAIN,770416.00,USD,770416,194
1,30,Government,Cash,CARE Nepal,1078582.00,USD,1078582,194
2,39,Government,Cash,DEC (UK),7396450.00,USD,7396450,194
3,52,Government,Cash,HI,462250.00,USD,462250,194
4,75,Government,Cash,IOM,1155624.00,USD,1155624,194
...,...,...,...,...,...,...,...,...
933,524,Government,Cash,Prime Minister's Disaster Relief Fund,936014.40,NPR,9222,44
934,618,Others,Cash,Prime Minister's Disaster Relief Fund,5114000.00,NPR,50384,200
935,734,Others,Cash,Prime Minister's Disaster Relief Fund,5193877.74,NPR,51171,48
936,926,Individual,Cash,Prime Minister's Disaster Relief Fund,30618.00,NPR,302,75


### Convert USD_amount column type from string to float

In [61]:
combined_w_country_id.dtypes

id              int32
donor_type     object
aid_type       object
receiver       object
amount        float64
currency       object
USD_amount     object
country_id      int32
dtype: object

In [62]:
combined_w_country_id['USD_amount'] = pd.to_numeric(combined_w_country_id['USD_amount'],errors='coerce')

In [63]:
combined_w_country_id.dtypes

id              int32
donor_type     object
aid_type       object
receiver       object
amount        float64
currency       object
USD_amount    float64
country_id      int32
dtype: object

## Use pandas/d6tstack to load financial aid dataframe into sql

In [64]:
start_time = time.time()
d6tstack.utils.pd_to_psql(combined_w_country_id, cfg_uri_psql, FINANCIAL_AID_TABLE, if_exists='append', sep='\t')
print("Time to load aid dataframe into sql:")
print("--- %s seconds ---" % (time.time() - start_time))

Time to load aid dataframe into sql:
--- 0.24005937576293945 seconds ---


### Confirm financial aid data has been added to database by querying the financial aid table

In [65]:
donor_list = session.query(FinancialAid).limit(10)
for donor in donor_list:
    print(f"id: {donor.id}, USD Amount: {donor.USD_amount}")

id: 0, USD Amount: 770416.0
id: 30, USD Amount: 1078582.0
id: 39, USD Amount: 7396450.0
id: 52, USD Amount: 462250.0
id: 75, USD Amount: 1155624.0
id: 76, USD Amount: 1926040.0
id: 81, USD Amount: 485362.0
id: 89, USD Amount: 847458.0
id: 101, USD Amount: 100000.0
id: 149, USD Amount: 770416.0
