In [1]:
import pandas as pd
import numpy as np

In [2]:
### emission data
# http://cdiac.ornl.gov/ftp/ndp030/CSV-FILES/
raw = pd.read_excel('indicator CDIAC carbon_dioxide_cumulative_emissions.xlsx')
raw = raw[raw.iloc[:,1:].any(axis=1)]
raw.rename(columns={'CO2 emissions from fossil-fuels since 1751 (metric tons)':'name'}, inplace=True)

In [3]:
import pycountry
def rename(s):
    try:
        numeric_id=pycountry.countries.get(name=s).numeric
    except:
        numeric_id = None
    return numeric_id

raw['name'].map(rename).isnull().sum()

45

In [4]:
pop = pd.read_csv('population.csv',skiprows=3)[['Country Name','Country Code','2014']]
pop.columns = ['Country Name','Country Code','population']

In [5]:
def get_id(s):
    #s = "{0:0>3}".format(n)
    try:
        numeric_id=int(pycountry.countries.get(alpha3=s).numeric)
    except:
        numeric_id = -1
    return numeric_id

pop['id'] = pop['Country Code'].map(get_id)
pop = pop[pop['id']!=-1]

In [6]:
area = pd.read_csv('area.csv',skiprows=3)[['Country Name','Country Code','2015']]
area.columns = ['Country Name','Country Code','area']
area['id'] = area['Country Code'].map(get_id)
area = area[area['id']!=-1]

In [7]:
nm_df = pd.read_table('world-country-names.tsv')
nm_dic = {'Bolivia, Plurinational State of': 'Bolivia',
          'Virgin Islands, British': 'British Virgin Islands',
          'Brunei Darussalam': 'Brunei',
          'Congo': 'Congo, Rep.',
          'Congo, the Democratic Republic of the': 'Congo, Dem. Rep.',
          'Cook Islands': 'Cook Is',
          "Côte d'Ivoire": "Cote d'Ivoire",
          'Faroe Islands': 'Faeroe Islands',
          'Falkland Islands (Malvinas)': 'Falkland Is (Malvinas)',
          'Hong Kong': 'Hong Kong, China',
          'Iran, Islamic Republic of': 'Iran',
          "Korea, Democratic People's Republic of": 'North Korea',
          'Korea, Republic of': 'South Korea',
          'Kyrgyzstan': 'Kyrgyz Republic',
          "Lao People's Democratic Republic": "Lao",
          'Macao': 'Macao, China',
          'Macedonia, the former Yugoslav Republic of': 'Macedonia, FYR',
          'Micronesia, Federated States of': 'Micronesia, Fed. Sts.',
          'Moldova, Republic of': 'Moldova',
          'Réunion': 'Reunion',
          'Russian Federation': 'Russia',
          'Saint Helena, Ascension and Tristan da Cunha': 'St. Helena',
          'Saint Kitts and Nevis': 'St. Kitts and Nevis',
          'Saint Lucia': 'St. Lucia',
          'Saint Vincent and the Grenadines': 'St. Vincent and the Grenadines',
          'Saint Pierre and Miquelon': 'St.-Pierre-et-Miquelon',
          'Slovakia': 'Slovak Republic',
          'Syrian Arab Republic': 'Syria',
          'Taiwan, Province of China': 'Taiwan',
          'Tanzania, United Republic of': 'Tanzania',
          'Wallis and Futuna': 'Wallis et Futuna',
          'Venezuela': 'Venezuela, Bolivarian Republic of',
          'Viet Nam': 'Vietnam',
          'Bonaire, Sint Eustatius and Saba': 'Saba'}

nm_df['name'] = nm_df['name'].map(lambda s: nm_dic[s] if s in nm_dic.keys() else s)

In [8]:
nm_df = nm_df.merge(pop[['id','population']], how='left', left_on='id', right_on='id')
nm_df = nm_df.merge(area[['id','area']], how='left', left_on='id', right_on='id')

In [9]:
df = nm_df.merge(raw, how='left', left_on='name', right_on='name')

In [10]:
df[['population','area']] = df[['population','area']].fillna(-1)

In [14]:
df[df['name']=='China']

Unnamed: 0,id,name,population,area,1751,1755,1760,1765,1770,1775,...,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011
46,156,China,1364270000.0,9388211.0,,,,,,,...,80738190000.0,85262950000.0,90550640000.0,96340130000.0,102754000000.0,109545200000.0,116582300000.0,124268700000.0,132556400000.0,141667400000.0


In [11]:
data_dict = []

for idx, row in df.iterrows():
    datum = {}
    datum['id'] = row['id']
    datum['name'] = row['name']
    datum['population'] = row['population']
    datum['area'] = row['area']
    data = row.iloc[4:].dropna()
    if data.any():
        cumulative = {}
        annual = {}
        
        last = 0
        for year in data.index:
            current = data[year]
            cumulative[year] = current
            if current - last:
                annual[year] = current - last
            last = current
        datum['annual'] = annual
        datum['cumulative'] = cumulative
    data_dict.append(datum)

In [12]:
import json
with open('emission.json', 'w') as outfile:
    json.dump(data_dict, outfile)