In [25]:
import pandas as pd
import jsonstat as jstat
from dfply import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

base_url = 'http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/'
base_url_1 = 'nrg_109a?precision=1&sinceTimePeriod=2010&unit=KTOE&indic_en=B_100100&'

In [26]:
# agg_lvls = [
#     ['0000'],
# #     ['2000','3000','4000','5100','5200','5500','6000','7200'],
# #     ['2100','2200','2410','3100','3200','4000','5100','5200','5510','5520','5530','5535','5540','5550','6000','7200']
# ]

## 0000 should be equal to all the others

In [27]:
# specify country, time here
country = 'AL'
year = '2016'

In [28]:
geos = [country]
url = "".join(list(map(lambda t: "geo={}&".format(t), geos)))[:-1]

# url2 = "".join(list(map(lambda t: "product={}&".format(t), agg_lvls[1])))[:-1]


full_url = base_url + base_url_1 + url
full_url

'http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/nrg_109a?precision=1&sinceTimePeriod=2010&unit=KTOE&indic_en=B_100100&geo=AL'

In [29]:
data = jstat.from_url(full_url)
data_df = data.to_data_frame('time', content='id')
data_df.head()

Unnamed: 0_level_0,unit,product,indic_en,geo,Value
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,KTOE,0,B_100100,AL,1572.2
2011,KTOE,0,B_100100,AL,1433.6
2012,KTOE,0,B_100100,AL,1605.1
2013,KTOE,0,B_100100,AL,1965.5
2014,KTOE,0,B_100100,AL,1876.2


In [30]:
data_df.reset_index(level=0,inplace=True)

In [31]:
data_df['product'].unique()

array(['0000', '2115', '2116', '2117', '2118', '2210', '2310', '2410',
       '3105', '3106', '3192', '3193', '4100', '5100', '5510', '5520',
       '5532', '5534', '5535', '5541', '55411', '55412', '55413', '55414',
       '55415', '55416', '5542', '55421', '55422', '55423', '55425',
       '55431', '55432', '5546', '5547', '5548', '5549', '5550', '7100'],
      dtype=object)

In [32]:
data_df = data_df[data_df['product'].str.len() <= 4]

In [33]:
data_df['lv3'] = data_df['product'].str[:3]
data_df['lv2'] = data_df['product'].str[:2]
data_df.head()

Unnamed: 0,time,unit,product,indic_en,geo,Value,lv3,lv2
0,2010,KTOE,0,B_100100,AL,1572.2,0,0
1,2011,KTOE,0,B_100100,AL,1433.6,0,0
2,2012,KTOE,0,B_100100,AL,1605.1,0,0
3,2013,KTOE,0,B_100100,AL,1965.5,0,0
4,2014,KTOE,0,B_100100,AL,1876.2,0,0


In [34]:
data_df['category'] = data_df['lv2']
data_df.loc[data_df['lv2'] == '55','category'] = data_df['lv3']
data_df.head()

Unnamed: 0,time,unit,product,indic_en,geo,Value,lv3,lv2,category
0,2010,KTOE,0,B_100100,AL,1572.2,0,0,0
1,2011,KTOE,0,B_100100,AL,1433.6,0,0,0
2,2012,KTOE,0,B_100100,AL,1605.1,0,0,0
3,2013,KTOE,0,B_100100,AL,1965.5,0,0,0
4,2014,KTOE,0,B_100100,AL,1876.2,0,0,0


In [35]:
pie_data = (data_df >> mask(X.geo == country, X.time == year, X.category != '00'))
pie_data = (pie_data >> group_by('category') >> summarize(total = X.Value.sum()))
# total_sum = sum(albania['Value'])

# lv2['category'] = lv2['product']
# lv2.loc[(lv2['Value'] / total_sum) < 0.05,'category'] = 'Other'


pie_data.sort_values(by=['total'],ascending=False).head()

Unnamed: 0,category,total
4,31,1031.1
7,551,669.1
10,554,186.0
5,41,35.0
9,553,12.8


In [36]:
# translation
trans = {
    "41": "Natural gas",
    "554": "Biomass",
    "31": "Crude oil",
    "51": "Nuclear",
    "552": "Wind",
    "553": "Solar",
    "551": "Hydro",
    "22": "Lignite and Derivatives"
}
trans['41']

def trans_(x):
    try:
        return trans[x]
    except KeyError:
        return 'Other'

pie_data['cn'] = pie_data['category'].apply(lambda x: trans_(x))

In [38]:
# plot = go.Pie(
#     values = pie_data['total'],
#     labels = pie_data['cn'],
#     name = 'AL'
# )

fig = {
    "data": [{
            'labels': pie_data['cn'],
            'values': pie_data['total'],
            'type': 'pie',
            'hoverinfo':'label+percent',
            'hole': .4
        }],
    'layout': {'title': 'Primary production in {}, {}'.format(country, year)}
}

iplot(fig, show_link=False)

In [41]:
csv_data = (pie_data >> group_by('cn') >> summarize(total = X.total.sum())) 
csv_data.to_csv("primary_production_yearly_{}_{}.csv".format(country, year),index=False)