In [1]:

import pandas as pd
pd.set_option('display.float_format', lambda x:'%.2f'%x)

df_f = pd.read_csv('final_version.csv', parse_dates= True)




In [2]:
def _get_world_production_lineplot(
    data: pd.DataFrame
) -> pd.DataFrame:
    """ Gets gross production lineplot for world per year
        Works at final_version.csv
    """
    from bokeh.io import output_notebook, show
    from bokeh.plotting import figure
    from bokeh.models import NumeralTickFormatter

    total_electricity=data.loc[(data['commodity_transaction'] == 'Electricity - Gross production')]
    total_at_world = total_electricity.groupby('year')['new_quantity'].sum().sort_values(ascending=True).reset_index()
    
    output_notebook()
    p = figure()
    p.line(x='year', y='new_quantity', line_width=2, source = total_at_world)
    p.yaxis.formatter=NumeralTickFormatter(format="00")
    p.xaxis.axis_label = 'Year'
    p.yaxis.axis_label = 'Production TW/h'
    p.title = 'World Electricity Production'
    show(p)


In [3]:
_get_world_production_lineplot(df_f)

In [4]:
def _get_country_yearly_stats(
    data: pd.DataFrame,
    country: str, 
) -> pd.DataFrame:
    """ Gets stats of selected country throught years
        Working at final_version.csv file 
    """

    import seaborn as sns
    from bokeh.layouts import gridplot
    from bokeh.io import output_notebook, show, reset_output
    from bokeh.plotting import figure
    from bokeh.models import HoverTool, ColumnDataSource, CDSView, BooleanFilter, Legend
    from bokeh.palettes import Spectral
    from bokeh.models import NumeralTickFormatter
    
    source_list = ['Electricity - total nuclear production',
                'Electricity - total geothermal production',
                'Electricity - total tide, wave and hydro production',
                'Electricity - total solar production',
                'Electricity - total wind production','Electricity - total thermal production','Electricity - Gross production']

    df_ele = data.loc[(data.commodity_transaction.isin(source_list))]

    source_list_short = {'Electricity - total nuclear production': 'Nuclear Production',
                'Electricity - total geothermal production': 'Geothermal Production',
                'Electricity - total tide, wave and hydro production': 'Tide, Wave and Hydro Production',
                'Electricity - total solar production': 'Solar Production',
                'Electricity - total wind production': 'Wind Production',
                'Electricity - total thermal production': 'Thermal Production',
                'Electricity - Gross production': 'Gross Production'}

    df_ele ['commodity_transaction'] = df_ele.commodity_transaction.map(source_list_short)         
          
    output_notebook()

    country_for_pivot = df_ele.loc[(df_ele['country_or_area'] == country)]

    names = list(country_for_pivot['commodity_transaction'].unique())
    names.remove('Gross Production')
    Reverse_Spectral= Spectral.copy()
    Reverse_Spectral[len(names)]

    total_by_source = pd.pivot_table(country_for_pivot, values = 'new_quantity', index=['year'], columns =['commodity_transaction'], fill_value=0)
    
    print(names)
    for i in names:
        total_by_source[i + ' %'] = (total_by_source[i])*100/(total_by_source[names].sum(axis=1))

    
    cm = sns.light_palette("blue", as_cmap=True)
    a = total_by_source.style.background_gradient(cmap=cm)

    dict_for_sort = {'Geothermal Production':5, 'Nuclear Production':4, 'Solar Production':3, 'Thermal Production':6, 'Tide, Wave and Hydro Production':1, 'Wind Production':2}
    names = sorted(names, key = dict_for_sort.get)
    source = source = ColumnDataSource(total_by_source)
    
    tooltips_source = [
        ('year', '@year'),
        ('By Source', '@$name{0,0} KWh. M'),
        ('Total','$y{0,0} KWh. M')
    ]

    subplot1 = figure(title = 'Electricity Production in {} by Source 1994-2014'.format(country),plot_width=800, plot_height=600,)
    subplot1.varea_stack(names,
                    x='year',color=Reverse_Spectral[len(names)] , legend_label = names, source = source)

    subplot1.vline_stack(names,
                    x='year',color = 'black', source = source)
    
    subplot1.line(x = 'year', y = 'Gross Production',
                line_dash=[4, 4], line_color='gray', line_width=2, legend_label='Gross Production', source = source)
    subplot1.add_layout(subplot1.legend[0], 'right')
    subplot1.legend.location = "bottom_right"
    subplot1.add_tools(HoverTool(tooltips = tooltips_source))
    subplot1.xaxis.axis_label = 'Year'
    subplot1.yaxis.axis_label = 'Production TW/h'
    subplot1.yaxis.formatter=NumeralTickFormatter(format="00")
    show(subplot1)
    
    return a 
   

In [5]:
_get_country_yearly_stats(df_f,'Poland') #działa na df_final

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ele ['commodity_transaction'] = df_ele.commodity_transaction.map(source_list_short)


['Solar Production', 'Thermal Production', 'Tide, Wave and Hydro Production', 'Wind Production']


commodity_transaction,Gross Production,Solar Production,Thermal Production,"Tide, Wave and Hydro Production",Wind Production,Solar Production %,Thermal Production %,"Tide, Wave and Hydro Production %",Wind Production %
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1994,135.347,0.0,131.561,3.786,0.0,0.0,97.202746,2.797254,0.0
1995,139.006,0.0,135.154,3.851,0.001,0.0,97.228897,2.770384,0.000719
1996,143.173,0.0,139.263,3.91,0.0,0.0,97.269038,2.730962,0.0
1997,142.79,0.0,138.972,3.816,0.002,0.0,97.326143,2.672456,0.001401
1998,142.789,0.0,138.458,4.327,0.004,0.0,96.966853,3.030345,0.002801
1999,142.128,0.0,137.842,4.282,0.004,0.0,96.984408,3.012777,0.002814
2000,145.184,0.0,141.063,4.116,0.005,0.0,97.161533,2.835023,0.003444
2001,145.616,0.0,141.382,4.22,0.014,0.0,97.092352,2.898033,0.009614
2002,144.126,0.0,140.159,3.906,0.061,0.0,97.247547,2.710129,0.042324
2003,151.631,0.0,148.214,3.293,0.124,0.0,97.746503,2.17172,0.081777


In [6]:
def _map_for_world(
    data: pd.DataFrame,
) -> pd.DataFrame:
    """ Print map for world with electic stats 
        Working at final_version.csv file 
    """
    import plotly.express as px 
    df_m = data.loc[(data['commodity_transaction']=='Electricity - Gross production')] 
    df_m.rename({'new_quantity':'Electric Energy Production TWh'},axis=1, inplace=True)
    df_m=df_m.sort_values("year")
    
   
    map = px.choropleth(df_m,             
              locations="ISO",               
              color='Electric Energy Production TWh',
              hover_name="country_or_area",  
              animation_frame="year",
              color_continuous_scale='Reds',
              hover_data=['Electric Energy Production TWh'],
              range_color=tuple(([0.0, 5800.0])),
              height=700  
             )

    map.update_layout(
    coloraxis_colorbar=dict(
    title="TVh",
    tickvals=[100,200, 400, 600,800,100,1200,1400,1600,1800,2000,2400,2800,3200,3600,4000,4400,4800,5200]), 
    title = 'Electricity Production at World')         
    map.show()


In [7]:
_map_for_world(df_f)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_m.rename({'new_quantity':'Electric Energy Production TWh'},axis=1, inplace=True)


In [8]:
def _production_structure_for_world(
    data: pd.DataFrame,
    ) -> pd.DataFrame:
    """ Return production structure for world 
        Working at final_version.csv file 
    """
    import seaborn as sns
    from bokeh.layouts import gridplot
    from bokeh.io import output_notebook, show, reset_output
    from bokeh.plotting import figure
    from bokeh.models import HoverTool, ColumnDataSource, CDSView, BooleanFilter, Legend
    from bokeh.palettes import Spectral
    


    source_list = ['Electricity - Gross production',
       'Electricity - total thermal production',
       'Electricity - total tide, wave and hydro production',
       'Electricity - total solar production',
       'Electricity - total nuclear production',
       'Electricity - total wind production',
       'Electricity - total geothermal production']
    df_fp = data.loc[(data.commodity_transaction.isin(source_list))]
    total_by_source = pd.pivot_table(df_fp , values = 'new_quantity', index=['year'], columns =['commodity_transaction'], aggfunc = ('sum'), fill_value=0).reset_index()
    
    names = ['Electricity - total tide, wave and hydro production',
                    'Electricity - total wind production',
                    'Electricity - total solar production',
                    'Electricity - total nuclear production',
                     'Electricity - total geothermal production',
                     'Electricity - total thermal production']
    
    for i in names:
        total_by_source[i + ' %'] = (total_by_source[i])*100/(total_by_source[names].sum(axis=1))
    
    source = source = ColumnDataSource(total_by_source)
    Reverse_Spectral= Spectral.copy()

    tooltips_source = [
    ('year', '@year'),
    ('By Source', '@$name{0,0} TWh'),
    ('Total','$y{0,0} TWh')
    ]
    output_notebook()
    subplot = figure(title = 'Electricity Production at World by Source 1994-2014',plot_width=900, plot_height=500)
    subplot.varea_stack(names,
                x='year',color=Reverse_Spectral[6] , legend_label = names, source = source)

    subplot.vline_stack(names,
                x='year',color = 'black', source = source)
    subplot.line(x = 'year', y = 'Electricity - Gross production',
            line_dash=[4, 4], line_color='gray', line_width=2, legend_label='Gross', source = source)

    subplot.legend.location ='bottom_right'
    subplot.add_tools(HoverTool(tooltips = tooltips_source))
    subplot.add_layout(subplot.legend[0], 'right')
    subplot.xaxis.axis_label = 'Year'
    subplot.yaxis.axis_label = 'Production TW/h'
    show(subplot)
        

In [9]:
_production_structure_for_world(df_f)

In [10]:
def _perc_stats_for_world(
       data: pd.DataFrame,
       ) -> pd.DataFrame:
       """ Return percentage share of each sector in production for the world 
        Working at final_version.csv file 
        """
       import seaborn as sns
       from bokeh.layouts import gridplot
       from bokeh.io import output_notebook, show, reset_output
       from bokeh.plotting import figure
       from bokeh.models import HoverTool, ColumnDataSource, CDSView, BooleanFilter, Legend
       from bokeh.palettes import Spectral
       Reverse_Spectral= Spectral.copy()

       source_list = ['Electricity - Gross production',
              'Electricity - total thermal production',
              'Electricity - total tide, wave and hydro production',
              'Electricity - total solar production',
              'Electricity - total nuclear production',
              'Electricity - total wind production',
              'Electricity - total geothermal production']
       df_fp = data.loc[(data.commodity_transaction.isin(source_list))]

       total_by_source_perc = pd.pivot_table(df_fp , values = 'new_quantity', index=['year'], columns =['commodity_transaction'], aggfunc = ('sum'), fill_value=0).reset_index()

       names = ['Electricity - total thermal production',
              'Electricity - total tide, wave and hydro production',
              'Electricity - total solar production',
              'Electricity - total nuclear production',
              'Electricity - total wind production',
              'Electricity - total geothermal production']
       nm_gross = ['Gross']

       for i in names:
              total_by_source_perc[i + ' %'] = (total_by_source_perc[i])*100/(total_by_source_perc[names].sum(axis=1))
       
       total_by_source_perc = total_by_source_perc.drop(['Electricity - total thermal production',
              'Electricity - total tide, wave and hydro production',
              'Electricity - total solar production',
              'Electricity - total nuclear production',
              'Electricity - total wind production',
              'Electricity - total geothermal production'], axis=1)  

       cm = sns.light_palette("blue", as_cmap=True)
       a = total_by_source_perc.style.background_gradient(cmap=cm)
       
       percentage_names = ['Electricity - total tide, wave and hydro production %',
                    'Electricity - total wind production %',
                    'Electricity - total solar production %',
                    'Electricity - total nuclear production %',
                     'Electricity - total geothermal production %',
                     'Electricity - total thermal production %']

       
       output_notebook()
       source_perc = ColumnDataSource(total_by_source_perc)
       subplot3 = figure( title = 'Percentage share of each sector in production at World',plot_width=900, plot_height=600)
       subplot3.vbar_stack(percentage_names,x='year', width=0.5, line_color='black',color= Reverse_Spectral[6],legend_label = percentage_names, source =source_perc)
       
       tooltips_mix = [
       ('year', '@year'),
       ('By Source', '@$name{0.00} %'),
       ('source','$name')
       ]
       subplot3.add_layout(subplot3.legend[0], 'right')
       subplot3.add_tools(HoverTool(tooltips = tooltips_mix))
       show(subplot3)
       
       return a

In [11]:
_perc_stats_for_world(df_f)

commodity_transaction,year,Electricity - Gross production,Electricity - total thermal production %,"Electricity - total tide, wave and hydro production %",Electricity - total solar production %,Electricity - total nuclear production %,Electricity - total wind production %,Electricity - total geothermal production %
0,1994,12886.108253,63.392133,18.819519,0.007163,17.409457,0.056221,0.315507
1,1995,13324.751389,62.991258,19.138195,0.007288,17.504503,0.058949,0.299808
2,1996,13753.309573,63.208897,18.83091,0.007831,17.577121,0.067821,0.30742
3,1997,14042.592982,63.889427,18.672518,0.008359,17.042867,0.083953,0.302875
4,1998,14370.600973,64.239604,18.310986,0.00812,17.017374,0.109211,0.314705
5,1999,14801.231135,64.564715,17.854132,0.00728,17.101401,0.144344,0.328128
6,2000,15496.088666,65.291325,17.471008,0.009512,16.709148,0.19844,0.320566
7,2001,15597.990241,65.588611,16.920147,0.011323,16.91017,0.239806,0.329943
8,2002,16215.14867,66.213075,16.718968,0.013167,16.413915,0.318939,0.321937
9,2003,16812.14485,67.396505,16.211027,0.015203,15.68277,0.374356,0.32014
