In [7]:

import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show, reset_output
pd.set_option('display.float_format', lambda x:'%.2f'%x)
from bokeh.models import NumeralTickFormatter
from bokeh.plotting import figure



In [2]:
df_f = pd.read_csv('final_version.csv', parse_dates= True)

df_f.head()

Unnamed: 0,country_or_area,ISO,commodity_transaction,year,category,new_unit,new_quantity
0,Afghanistan,AFG,Electricity - Gross production,1994,total_electricity,"Kilowatt-hours, thousand",687000.0
1,Afghanistan,AFG,Electricity - Gross production,1995,total_electricity,"Kilowatt-hours, thousand",675000.0
2,Afghanistan,AFG,Electricity - Gross production,1996,total_electricity,"Kilowatt-hours, thousand",675000.0
3,Afghanistan,AFG,Electricity - Gross production,1997,total_electricity,"Kilowatt-hours, thousand",670000.0
4,Afghanistan,AFG,Electricity - Gross production,1998,total_electricity,"Kilowatt-hours, thousand",665000.0


In [3]:
df_r = pd.read_csv('renewable.csv', parse_dates= True)

df_r.head()

Unnamed: 0,country_or_area,ISO,commodity_transaction,year,category,new_unit,new_quantity,renewable,non_renewable,difference
0,Afghanistan,AFG,Electricity - Gross production,1994,total_electricity,"Kilowatt-hours, thousand",687000.0,472000.0,215000.0,0.0
1,Afghanistan,AFG,Electricity - Gross production,1995,total_electricity,"Kilowatt-hours, thousand",675000.0,466000.0,209000.0,0.0
2,Afghanistan,AFG,Electricity - Gross production,1996,total_electricity,"Kilowatt-hours, thousand",675000.0,475000.0,200000.0,0.0
3,Afghanistan,AFG,Electricity - Gross production,1997,total_electricity,"Kilowatt-hours, thousand",670000.0,485000.0,185000.0,0.0
4,Afghanistan,AFG,Electricity - Gross production,1998,total_electricity,"Kilowatt-hours, thousand",665000.0,495000.0,170000.0,0.0


In [4]:
total_electricity=df_f.loc[(df_f['commodity_transaction'] == 'Electricity - Gross production')]
total_electricity.category.unique()


array(['total_electricity'], dtype=object)

In [5]:
total_at_world = total_electricity.groupby('year')['new_quantity'].sum().sort_values(ascending=True).reset_index()
total_at_world


Unnamed: 0,year,new_quantity
0,1994,12886108252.5
1,1995,13324751389.0
2,1996,13753309573.1
3,1997,14042592982.3
4,1998,14370600972.5
5,1999,14801231134.8
6,2000,15496088666.2
7,2001,15597990241.2
8,2002,16215148669.5
9,2003,16812144850.4


In [16]:
output_notebook()
p = figure()
p.line(x='year', y='new_quantity', line_width=2, source = total_at_world)
p.yaxis.formatter=NumeralTickFormatter(format="00")
p.xaxis.axis_label = 'Rok'
p.yaxis.axis_label = 'Zużycie kV/h'
show(p)

In [17]:
def _get_country_per_year_stats(
    data_renewable: pd.DataFrame,
    country: str, 
    year: int
) -> pd.DataFrame:
    """ Gets stats of country per year"""

    yearlist = [year-2,year-1,year]
    frame_for_year = data_renewable.loc[(data_renewable['year'] == year)].sort_values(by= 'new_quantity', ascending=True).reset_index(drop=True)
    contry_frame_3years = data_renewable[ (data_renewable['year'].isin(yearlist)) & (data_renewable['country_or_area'] == country)]
    country_frame = data_renewable[(data_renewable['year'] == year) & (data_renewable['country_or_area'] == country)]
    top3 = frame_for_year.head(3)
    bot3 = frame_for_year.tail(3)
    frameList = [top3,country_frame,bot3]
    ranking = pd.concat(frameList)
    rankinglist = [1,2,3,frame_for_year[frame_for_year['country_or_area'] == country].index[0]+1,bot3.index[0]+1,bot3.index[1]+1,bot3.index[2]+1]
    ranking.insert(loc=0, column= 'Ranking', value = rankinglist )
    
    return ranking

In [18]:
_get_country_per_year_stats(df_r,'Poland',2014)

Unnamed: 0,Ranking,country_or_area,ISO,commodity_transaction,year,category,new_unit,new_quantity,renewable,non_renewable,difference
0,1,Niue,NIU,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",3226.67,66.67,3160.0,0.0
1,2,Tuvalu,TUV,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",5100.0,0.0,5100.0,0.0
2,3,St. Helena and Depend.,SHN,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",10670.0,1080.0,9590.0,0.0
3380,205,Poland,POL,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",159059000.0,10417000.0,148642000.0,0.0
226,227,India,IND,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",1308873000.0,197532000.0,1111341000.0,0.0
227,228,United States,USA,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",4339210000.0,1339316000.0,2999894000.0,0.0
228,229,China,CHN,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",5649583500.0,1368142000.0,4281441500.0,0.0


In [19]:
def _get_country_yearly_stats(
    data: pd.DataFrame,
    country: str, 
) -> pd.DataFrame:
    """ Gets stats of country throught years"""

    import seaborn as sns
    from bokeh.layouts import gridplot
    from bokeh.io import output_notebook, show, reset_output
    from bokeh.plotting import figure
    from bokeh.models import HoverTool, ColumnDataSource, CDSView, BooleanFilter, Legend
    from bokeh.palettes import Spectral
    from bokeh.models import NumeralTickFormatter
    
    source_list = ['Electricity - total nuclear production',
                'Electricity - total geothermal production',
                'Electricity - total tide, wave and hydro production',
                'Electricity - total solar production',
                'Electricity - total wind production','Electricity - total thermal production','Electricity - Gross production']

    df_ele = data.loc[(data.commodity_transaction.isin(source_list))]

    source_list_short = {'Electricity - total nuclear production': 'Nuclear Production',
                'Electricity - total geothermal production': 'Geothermal Production',
                'Electricity - total tide, wave and hydro production': 'Tide, Wave and Hydro Production',
                'Electricity - total solar production': 'Solar Production',
                'Electricity - total wind production': 'Wind Production',
                'Electricity - total thermal production': 'Thermal Production',
                'Electricity - Gross production': 'Gross Production'}

    df_ele ['commodity_transaction'] = df_ele.commodity_transaction.map(source_list_short)         
          
    output_notebook()

    country_for_pivot = df_ele.loc[(df_ele['country_or_area'] == country)]

    names = list(country_for_pivot['commodity_transaction'].unique())
    names.remove('Gross Production')
    Reverse_Spectral= Spectral.copy()
    Reverse_Spectral[len(names)]

    total_by_source = pd.pivot_table(country_for_pivot, values = 'new_quantity', index=['year'], columns =['commodity_transaction'], fill_value=0)
    
    
    for i in names:
        total_by_source[i + ' %'] = (total_by_source[i])*100/(total_by_source[names].sum(axis=1))

    
    cm = sns.light_palette("blue", as_cmap=True)
    a = total_by_source.style.background_gradient(cmap=cm)

    
    
    source = source = ColumnDataSource(total_by_source)
    
    tooltips_source = [
        ('year', '@year'),
        ('By Source', '@$name{0,0} KWh. M'),
        ('Total','$y{0,0} KWh. M')
    ]

    subplot1 = figure(title = 'Electricity Production in {} by Source 1994-2014'.format(country),plot_width=800, plot_height=600,)
    subplot1.varea_stack(names,
                    x='year',color=Reverse_Spectral[len(names)] , legend_label = names, source = source)

    subplot1.vline_stack(names,
                    x='year',color = 'black', source = source)
    
    subplot1.line(x = 'year', y = 'Gross Production',
                line_dash=[4, 4], line_color='gray', line_width=2, legend_label='Gross Production', source = source)
    subplot1.add_layout(subplot1.legend[0], 'right')
    subplot1.legend.location = "bottom_right"
    #subplot1.legend.location ='outside'
    subplot1.add_tools(HoverTool(tooltips = tooltips_source))
    subplot1.yaxis.formatter=NumeralTickFormatter(format="00")
    show(subplot1)
    
    return a 
   

In [20]:
_get_country_yearly_stats(df_f,'Finland')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ele ['commodity_transaction'] = df_ele.commodity_transaction.map(source_list_short)


commodity_transaction,Gross Production,Nuclear Production,Solar Production,Thermal Production,"Tide, Wave and Hydro Production",Wind Production,Nuclear Production %,Solar Production %,Thermal Production %,"Tide, Wave and Hydro Production %",Wind Production %
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1994,65631000,19427000,1000,34409000,11787000,7000,29.600341,0.001524,52.427968,17.959501,0.010666
1995,64035000,19216000,1000,31882000,12925000,11000,30.008589,0.001562,49.788397,20.184274,0.017178
1996,69373000,19476000,1000,38025000,11860000,11000,28.074323,0.001441,54.812391,17.095988,0.015856
1997,69176000,20894000,1000,36022000,12242000,17000,30.204117,0.001446,52.072973,17.696889,0.024575
1998,70167000,21853000,1000,33239000,15051000,23000,31.14427,0.001425,47.371271,21.450254,0.032779
1999,69457000,22974000,1000,33653000,12780000,49000,33.07658,0.00144,48.45156,18.399873,0.070547
2000,69968000,22479000,2000,32558000,14660000,78000,32.215486,0.002866,46.660074,21.009788,0.111785
2001,74483000,22773000,2000,38250000,13205000,70000,30.650067,0.002692,51.480485,17.772544,0.094213
2002,74945000,22295000,2000,41569000,10776000,64000,29.843654,0.002677,55.643456,14.424544,0.085669
2003,84312000,22731000,2000,51646000,9591000,93000,27.040434,0.002379,61.437255,11.4093,0.110631


In [21]:
df_2014 = df_f[(df_f['year'] == 2014) & (df_f['commodity_transaction'] == 'Electricity - Gross production')] 
df_2014

Unnamed: 0,country_or_area,ISO,commodity_transaction,year,category,new_unit,new_quantity
20,Afghanistan,AFG,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",1049300.00
83,Albania,ALB,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",4724430.00
143,Algeria,DZA,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",64242000.00
206,American Samoa,ASM,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",156945.00
251,Andorra,AND,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",126800.00
...,...,...,...,...,...,...,...
16204,Viet Nam,VNM,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",145730000.00
16267,Wallis and Futuna Is.,WLF,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",18556.00
16302,Yemen,YEM,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",7646000.00
16344,Zambia,ZMB,Electricity - Gross production,2014,total_electricity,"Kilowatt-hours, thousand",14452000.00


In [22]:
df_m = df_f[['ISO','year','commodity_transaction','new_quantity','country_or_area']]
df_m = df_m.loc[(df_m['commodity_transaction']=='Electricity - Gross production')] 
df_m.rename({'new_quantity':'Electric_Energy_Production'},axis=1, inplace=True)
df_m=df_m.sort_values("year")
df_m

Unnamed: 0,ISO,year,commodity_transaction,Electric_Energy_Production,country_or_area
0,AFG,1994,Electricity - Gross production,687000.00,Afghanistan
1808,BRA,1994,Electricity - Gross production,261786000.00,Brazil
10984,NOR,1994,Electricity - Gross production,113356000.00,Norway
11137,OMN,1994,Electricity - Gross production,6187000.00,Oman
11179,@@@,1994,Electricity - Gross production,136023000.00,Other Asia
...,...,...,...,...,...
1208,BLR,2014,Electricity - Gross production,34735000.00,Belarus
10091,NAM,2014,Electricity - Gross production,1498000.00,Namibia
12711,STP,2014,Electricity - Gross production,67000.00,Sao Tome and Principe
10028,MMR,2014,Electricity - Gross production,14156300.00,Myanmar


In [23]:
import plotly.express as px 
px.choropleth(df_m,             
              locations="ISO",               
              color=np.log(df_m['Electric_Energy_Production']),
              hover_name="country_or_area",  
              animation_frame="year",
              color_continuous_scale="Jet",    
              height=700  

             )

       