# Distribution of production to all facilities

Assumptions -> Each facility (within product and territory group) is producing at same rate of it's capacity

Caveats -> Allocating production for FORMER USSR to RUSSIAN FEDERATION, FORMER CZECHOSLOVAKIA TO CZECH REPUBLIC, FORMER YUGOSLAVIA TO SERBIA/MONTENEGRO/KOSOVO
-> Not using specific routes stated in production as they do not match-up with capacity routes

In [None]:
import pandas as pd

filepath = "D:/data/ICIS_data/facility_stats/to_2050/"
output_path = '../data/processed/'

In [None]:
## Data filtering
# Import ICIS data for facilities and production
capacity, production = [pd.read_csv(filepath+file, low_memory=False, index_col=0) for file in ['capacity.csv', 'production.csv']]

# Distribute production amongst facilities according to capacities
years = list(map(str, list(range(1978, 2051))))
capacity_filt = capacity.dropna(axis=1, how='all').replace('-',0).fillna(0)
capacity_filt[years] = capacity_filt[years].astype(float)

# Fix production names
name_dict = {'FORMER USSR':'RUSSIAN FEDERATION', 'FORMER CZECHOSLOVAKIA':'CZECH REPUBLIC', 'FORMER YUGOSLAVIA': 'SERBIA/MONTENEGRO/KOSOVO'}
production['COUNTRY/TERRITORY'] = production['COUNTRY/TERRITORY'].replace(name_dict)
production_filt = production.dropna(axis=1, how='all').replace('-',0).fillna(0)
production_filt[years] = production_filt[years].astype(float)

In [None]:
# Get proportion of capacity that each facility represents per product/country
grouping_cols = ['PRODUCT', 'COUNTRY/TERRITORY']
capacity_totals = capacity_filt.groupby(grouping_cols).sum()
capacity_props = capacity_filt.merge(capacity_totals[years], on=grouping_cols, how='left')
x_cols, y_cols = [[str(i)+addition for i in years] for addition in ['_x', '_y']]

for year, x_col, y_col in zip(years, x_cols, y_cols):
    capacity_props[year] = capacity_props[x_col]/capacity_props[y_col]
capacity_props = capacity_props[list(capacity_props.columns[:14])+years].fillna(0)

# Get production per grouping columns
production_totals = production_filt[grouping_cols+years].groupby(grouping_cols).sum().reset_index()

# Get production per facility
facility_production = capacity_props.merge(production_totals, on=grouping_cols, how='left')
for year, x_col, y_col in zip(years, x_cols, y_cols):
    facility_production[year] = facility_production[x_col]*facility_production[y_col]
facility_production = facility_production[list(facility_production.columns[:14])+years].fillna(0)

In [None]:
facility_production.to_csv(output_path+'icisFacilityProduction.csv')

In [None]:
## Add uncertainties
production_uncertainty = 0.1
facility_production = pd.read_csv(output_path+'icisFacilityProduction.csv', index_col=0)

years = list(map(str, list(range(1978, 2051))))
for col in years:
    facility_production[col+'_sigma'] = facility_production[col]*production_uncertainty

facility_production.to_csv(output_path+'icisFacilityProduction_w_uncertainties.csv')