### Load packages

In [1]:
import pandas as pd

from user_setup import *
from gst_tools import constants
from gst_tools import data_manipulation as utils
from gst_tools import plotting

### Load data

In [2]:
# Data selection

if dataset == 1 or dataset == 2:
    raw_data_fname = constants.primap_extrap_fname
elif dataset == 3:
    raw_data_fname = constants.bp_world_energy_panel_fname
elif dataset == 4:
    raw_data_fname = constants.ipcc_ar6
elif dataset == 5:
    raw_data_fname = constants.ipcc_ar6_incl_indirect
else:
    raise ValueError('Error. Please provide a valid data type (either 1, 2, 3 or 4.)')

In [3]:
# Data loading

raw_data = utils.load_data(constants.input_folder, raw_data_fname)

### Reorganise and filter data

In [4]:
# Reorganise dataset for processing

if dataset == 1 or dataset == 2:
    renamed_data = utils.rename_primap(raw_data)
elif dataset == 3:
    renamed_data = utils.rename_bp(raw_data)
elif dataset == 4:
    renamed_data = utils.rename_ipcc(raw_data)
elif dataset == 5:
    renamed_data = utils.rename_ipcc_indirect(raw_data)

In [5]:
# Filter dataset

if dataset == 1:
    primap_scenario = 'HISTCR'
elif dataset == 2:
    primap_scenario = 'HISTTP'

if baseline_year >= year_of_interest:
    print('Error: The baseline year is greater than the year of interest. The baseline year must be smaller than the year of interest.')
else:
    if dataset == 1 or dataset == 2:
        proc_data = utils.filter_primap(renamed_data, primap_gas, primap_sector, primap_scenario, countries, baseline_year, constants.gas_names_variable)
    elif dataset == 3:
        proc_data = utils.filter_bp(renamed_data, energy_variable, countries, baseline_year)
    elif dataset == 4:
        proc_data = utils.filter_ipcc(renamed_data, ipcc_gas, ipcc_sector_or_subsector, countries, baseline_year)
    elif dataset == 5:
        proc_data = utils.filter_ipcc_indirect(renamed_data, ipcc_gas, ipcc_sector_or_subsector, countries, baseline_year)

  total = sector_aggr.groupby(by=['country', 'year']).sum().reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered['subsector'] = filtered['subsector'].astype(str) + ' [' + filtered['sector'].astype(str) + ']'


In [6]:
# Convert the "absolute" dataset to reasonable units

convert_from_Gg_to_Mt = True
convert_from_t_to_Mt = True

if (dataset == 1 or dataset == 2) and convert_from_Gg_to_Mt:
    proc_data = utils.convert_from_Gg_to_Mt(proc_data)
elif (dataset == 4 or dataset == 5) and convert_from_t_to_Mt:
    proc_data = utils.convert_from_t_to_Mt(proc_data)

In [7]:
# Define processed data filename

if dataset == 1 or dataset == 2:
    proc_fname = utils.define_primap_proc_fname(proc_data, constants.gas_names_fname, constants.sector_names_fname)
elif dataset == 3:
    proc_fname = utils.define_bp_proc_fname(proc_data)
elif dataset == 4:
    proc_fname = utils.define_ipcc_proc_fname(proc_data)
elif dataset == 5:
    proc_fname = utils.define_ipcc_proc_fname(proc_data, indirect=True)

In [8]:
# Write filtered dataset to file

utils.write_to_file(proc_data, constants.proc_data_folder, proc_fname)

### Normalise dataset if necessary

In [9]:
# Normalisation per capita and per USD

if data_type != 'absolute':
    if dataset == 3 and energy_variable != 3:
        print('ERROR: The energy variable selected is a share, so it can be no further normalised.')
    else:
        if data_type == 'per capita':
            pop_dset = utils.rearrange_wb_data(constants.input_folder, constants.wb_population_fname)
            final_data = utils.normalise(proc_data, pop_dset, data_type)
        else:
            gdp_dset = utils.rearrange_wb_data(constants.input_folder, constants.wb_gdp_fname)
            final_data = utils.normalise(proc_data, gdp_dset, data_type)
        

        convert_normalised = True

        if convert_normalised:
            final_data = utils.convert_norm(final_data, dataset, data_type)
        
        utils.write_to_file(final_data, constants.proc_data_folder, proc_fname.replace('.csv', '_'+data_type+'.csv').replace(' ', '_'))
            
else:
    final_data = proc_data

### Plot

In [10]:
if plot_type != 4:
    dset_to_plot, variable, unit = utils.prepare_for_plotting(final_data, plot_type)
else:
    year_max, start_year, end_year, dset_to_plot, variable, unit = utils.prepare_for_plotting(final_data, plot_type)

if dataset == 1:
    source = constants.primap_source_histcr
elif dataset == 2:
    source = constants.primap_source_histtp
elif dataset == 3:
    source = constants.bp_source
elif dataset == 4:
    source = constants.ipcc_source
elif dataset == 5:
    source = constants.ipcc_indirect_source

In [11]:
plot_type_text, plot_fname = plotting.define_plot_name(plot_type, variable, year_of_interest, baseline_year, constants.output_folder, file_type=file_type)
plot_name = variable + ' - ' + plot_type_text.lower()

In [12]:
trends, rolling_trends, trends_unit = plotting.calculate_trends(dset_to_plot, num_years_trend=5)

In [13]:
if plot_type == 1:
    plotting.make_histogram(dset_to_plot, year_of_interest, unit, selected_country='', plot_type=1,
    xlabel=plot_type_text, variable_title=variable, sourcename=source, filepath=plot_fname, remove_outliers=True,
    plot_name=plot_name, font=constants.font, dpi=constants.dpi_types_123, label_font_size=constants.label_font_size,
    title_font_size=constants.title_font_size, stats_font_size=constants.stats_font_size)

elif plot_type == 2:
    df_abs_diff_baseline, df_perc_diff_baseline = utils.calculate_diff_since_yearX(dset_to_plot, baseline_year)
    plotting.make_histogram(df_perc_diff_baseline, year_of_interest, '%', selected_country='', plot_type=2,
        xlabel=plot_type_text, variable_title=variable, sourcename=source, filepath=plot_fname, remove_outliers=True,
        plot_name=plot_name, font=constants.font, dpi=constants.dpi_types_123, label_font_size=constants.label_font_size,
        title_font_size=constants.title_font_size, stats_font_size=constants.stats_font_size,
        x_below_countries = constants.x_below_countries,
        x_below_arrow = constants.x_below_arrow,
        x_above_countries = constants.x_above_countries,
        x_above_arrow = constants.x_above_arrow
        )

elif plot_type == 3:
    trends, rolling_trends, trends_unit = plotting.calculate_trends(dset_to_plot, num_years_trend=5)
    
    starts_with_gas = False
    for g_name in list(constants.gas_names_variable.values()):
        if variable.startswith(g_name):
            starts_with_gas = True
    
    if starts_with_gas == False:
        v_title = '5-year trend in\n' + variable[0].lower() + variable[1:]
    else:
        v_title = '5-year trend in\n' + variable

    plotting.make_histogram(rolling_trends, year_of_interest, trends_unit, selected_country='', plot_type=3,
        xlabel=plot_type_text, variable_title=v_title, sourcename=source, filepath=plot_fname, remove_outliers=True,
        plot_name=plot_name, ktuk=2, font=constants.font, dpi=constants.dpi_types_123, label_font_size=constants.label_font_size,
        title_font_size=constants.title_font_size, stats_font_size=constants.stats_font_size,
        x_below_countries = constants.x_below_countries,
        x_below_arrow = constants.x_below_arrow,
        x_above_countries = constants.x_above_countries,
        x_above_arrow = constants.x_above_arrow
    )

elif plot_type == 4:
    plotting.make_histogram_peaking(year_max, variable, start_year, end_year, save_plot=True, filepath=plot_fname,
    font=constants.font, dpi=constants.dpi_type_4, label_font_size=constants.label_font_size,
    title_font_size=constants.title_font_size, stats_font_size=constants.stats_font_size)

else:
    raise ValueError('Error. Please provide a valid plot type (either 1, 2, 3 or 4.)')


