In [1]:
import pandas as pd

import numpy as np

import re

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func
from sqlalchemy import and_, or_, not_

# Password and user for postgreSQL DB
from config import postgreSQL_User, postgreSQL_Pass

In [2]:
# establish connection to database
rds_connection_string = f"postgresql://{postgreSQL_User}:{postgreSQL_Pass}@localhost:5432/FAOSTAT"
engine = create_engine(rds_connection_string)

In [3]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

In [4]:
# We can view all of the classes that automap found
Base.classes.keys()
# Since no primary key is present cannot use automap_base()
# except for complete_merge_table

['complete_merge_table']

In [5]:
# use inspector to get table_names
inspector = inspect(engine)
inspector.get_table_names()

['emissions_land_total',
 'production_crop',
 'complete_merge_table',
 'population_all',
 'emissions_agriculture_total']

In [6]:
# show columns
columns = inspector.get_columns('complete_merge_table')
print("""complete_merge_table
------------------------""")
for c in columns:
    print(c['name'], c["type"])
print("------------------------")

complete_merge_table
------------------------
area_code INTEGER
area VARCHAR(255)
year INTEGER
population REAL
land_co2 REAL
agri_ch4 REAL
agri_n2o REAL
agri_co2 REAL
production REAL
harvest REAL
yields REAL
id INTEGER
------------------------


In [7]:
# Save reference to table
Total = Base.classes.complete_merge_table

# Create our session (link) from Python to the DB
session = Session(engine)

In [83]:
# View the entire table as a list of tuples
# .yield is a reserved word?  Changed it to yields instead
sel = [Total.area_code, Total.area, Total.year, Total.population, Total.land_co2,\
        Total.agri_ch4, Total.agri_n2o, Total.agri_co2, Total.production, Total.yields]

count_names = session.query(Total.area).all()

In [10]:
# Make a list of variables which will query the table
sel = [Total.area_code, Total.area, Total.year, Total.population, Total.land_co2,\
        Total.agri_ch4, Total.agri_n2o, Total.agri_co2, Total.production, Total.yields]
start_year = 2000
end_year = 2012
select_area = "Armenia"

query = session.query(*sel).\
        filter(Total.area == select_area).\
        filter(and_(Total.year >= start_year,\
                    Total.year <= end_year)).\
        order_by(Total.area.asc()).\
        order_by(Total.year.asc()).all()

query

[(1,
  'Armenia',
  2000,
  3069.59,
  293.333,
  45.621,
  3.0775,
  2373.22,
  2766080.0,
  2895340.0),
 (1,
  'Armenia',
  2001,
  3050.66,
  226.979,
  47.2032,
  3.1336,
  2431.66,
  3444010.0,
  3249830.0),
 (1,
  'Armenia',
  2002,
  3033.9,
  220.388,
  49.0474,
  3.7213,
  2741.71,
  3629580.0,
  3251430.0),
 (1,
  'Armenia',
  2003,
  3017.81,
  293.721,
  50.9378,
  3.6001,
  2723.19,
  3821510.0,
  3516200.0),
 (1,
  'Armenia',
  2004,
  3000.61,
  222.326,
  53.4728,
  3.9221,
  2926.79,
  4710740.0,
  3896870.0),
 (1,
  'Armenia',
  2005,
  2981.26,
  221.163,
  53.5904,
  3.7756,
  2861.74,
  5345370.0,
  4797600.0),
 (1,
  'Armenia',
  2006,
  2958.5,
  230.081,
  55.1691,
  5.8205,
  3838.41,
  4902780.0,
  4894540.0),
 (1,
  'Armenia',
  2007,
  2933.06,
  295.272,
  57.5969,
  4.7334,
  3384.78,
  6004210.0,
  5235610.0),
 (1,
  'Armenia',
  2008,
  2908.22,
  221.551,
  58.062,
  4.5646,
  3323.08,
  6071040.0,
  5335100.0),
 (1,
  'Armenia',
  2009,
  2888.58,
  29

In [84]:
areas = [x[0] for x in count_names]
output = []
for y in areas:
    if y not in output:
        output.append(y)
print(output)

['Armenia', 'Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Australia', 'Austria', 'Bahamas', 'Bahrain', 'Barbados', 'Belgium-Luxembourg', 'Bangladesh', 'Bermuda', 'Brazil', 'Bhutan', 'Bolivia (Plurinational State of)', 'Botswana', 'Aruba', 'Belize', 'Solomon Islands', 'Brunei Darussalam', 'Bulgaria', 'Burundi', 'Myanmar', 'Cameroon', 'Canada', 'Cabo Verde', 'Cayman Islands', 'Central African Republic', 'Sri Lanka', 'Chad', 'Chile', 'China, mainland', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', 'Cuba', 'Cyprus', 'Czechoslovakia', 'Azerbaijan', 'Benin', 'Denmark', 'Dominica', 'Dominican Republic', 'Belarus', 'Egypt', 'Ecuador', 'El Salvador', 'Equatorial Guinea', 'Ethiopia PDR', 'Estonia', 'Faroe Islands', 'Falkland Islands (Malvinas)', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'Djibouti', 'Georgia', 'Gabon', 'Gambia', 'Germany', 'Bosnia and Herzegovina', 'Ghana', 'Gibraltar', 'Kir

In [76]:
import requests

# https://github.com/Miguel-Frazao/world-data
#req = requests.get('https://raw.githubusercontent.com/Miguel-Frazao/world-data/master/countries_data.json').json()
req = requests.get('https://raw.githubusercontent.com/Miguel-Frazao/world-data/master/countries.json').json()
countries = (i['name'] for i in req)
country_list = list(countries)
print(country_list)

['Andorra', 'United Arab Emirates', 'Afghanistan', 'Antigua and Barbuda', 'Anguilla', 'Albania', 'Armenia', 'Angola', 'Antarctica', 'Argentina', 'American Samoa', 'Austria', 'Australia', 'Aruba', 'Aland Islands', 'Azerbaijan', 'Bosnia and Herzegovina', 'Barbados', 'Bangladesh', 'Belgium', 'Burkina Faso', 'Bulgaria', 'Bahrain', 'Burundi', 'Benin', 'Saint Barthelemy', 'Bermuda', 'Brunei', 'Bolivia', 'Bonaire, Saint Eustatius and Saba ', 'Brazil', 'Bahamas', 'Bhutan', 'Bouvet Island', 'Botswana', 'Belarus', 'Belize', 'Canada', 'Cocos Islands', 'Democratic Republic of the Congo', 'Central African Republic', 'Republic of the Congo', 'Switzerland', 'Ivory Coast', 'Cook Islands', 'Chile', 'Cameroon', 'China', 'Colombia', 'Costa Rica', 'Cuba', 'Cape Verde', 'Curacao', 'Christmas Island', 'Cyprus', 'Czech Republic', 'Germany', 'Djibouti', 'Denmark', 'Dominica', 'Dominican Republic', 'Algeria', 'Ecuador', 'Estonia', 'Egypt', 'Western Sahara', 'Eritrea', 'Spain', 'Ethiopia', 'Finland', 'Fiji', 'F

In [85]:
country_verified = list(set(country_list) & set(output))
country_verified.sort()

not_country = list(set(output) - set(country_list))
not_country.sort()

print(country_verified)
print(len(country_verified))
print(not_country)
print(len(not_country))

['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Democratic Republic of the Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guam', 'Guatemala', 'Guinea'

In [79]:
dictionary = {
    "database": output,
    "verified_country": country_verified,
    "Not_a_country": not_country
}

In [80]:
import json
with open('data.json', 'w', encoding="latin-1") as f:
    json.dump(dictionary, f, ensure_ascii=False, indent=4)