# Processing CSV files for SEEDS database
This notebook contains source code for processing different CSV files of SEEDS project and preparing data in a format that allows populating SEEDS app DB.

In [55]:
import pandas as pd
import warnings
import geopandas
import pprint as pp
warnings.filterwarnings('ignore')

## Loading CSV files

In [116]:
nameplate = pd.read_csv('./energy/nameplate_capacity.csv')
storage = pd.read_csv('./energy/storage_capacity.csv')
citizen = pd.read_csv('./energy/citizen_leadership_degree.csv')
acc = pd.read_csv('./energy/deployment_acceleration_rate.csv')
imp = pd.read_csv('./energy/import_dependency.csv')
bio = pd.read_csv('./energy/biofuel_use_rate.csv')
ele_heat = pd.read_csv('./energy/electrification_rate_heat_building.csv')
ele_road = pd.read_csv('./energy/electrification_rate_road_transport.csv')

In [117]:
# Unique type of power generating technology
power_tech = list(nameplate.techs.unique())

pp.pprint(power_tech)

['chp_biofuel_extraction',
 'chp_hydrogen',
 'chp_methane_extraction',
 'chp_wte_back_pressure',
 'electrolysis',
 'existing_pv',
 'existing_wind',
 'hydro_reservoir',
 'hydro_run_of_river',
 'open_field_pv',
 'pumped_hydro',
 'wind_onshore',
 'battery',
 'ccgt',
 'roof_mounted_pv',
 'wind_offshore']


## Preparing data for Scenario table
Here, we will first structure data for creating Scenario table which will store one etry per scenario. The schema of the table is given below

Table: Scenario
Columns: 

    power_capacity 
    storage_capacity
    community_infrastructure
    implementation_pace
    import_dependency
    bio_fuel
    
    ## computed by summing up data across all location for each scenario
    on_shore_wind
    off_shore_wind
    open_field_pv
    roof_mounted_pv
    hydro_run_river
    
    ## computed by summing up data across generation and storage for each scenario
    land_occupation
    marine_toxicity
    human_toxicity
    fossil_depletion
    metal_depletion
    climate_change


In [118]:
pw_nameplate = nameplate.loc[nameplate['techs'].isin(power_tech),:]
total_power = pw_nameplate.groupby(['spores']).sum().reset_index()

pw_storage = storage.loc[storage['techs'].isin(power_tech),:]
total_storage = storage.groupby(['spores']).sum().reset_index()

### Preparing partial Scenario table

In [124]:
# Scenario table with partial columns dataframe
scenario_partial = pd.DataFrame(columns=['id','infra','pace','import','power',
                                 'storage','bio'])

In [125]:
scenario_ids = sorted(acc.spores.unique())

for spore in scenario_ids:
    scenario_id = spore
    infrastructure = citizen.loc[citizen['spores'] == spore,'citizen_leadership_degree'].to_list()[0]
    deployment_rate = acc.loc[acc['spores'] == spore,'deployment_acceleration_rate'].to_list()[0]
    import_dependency = imp.loc[imp['spores'] == spore,'import_dependency'].to_list()[0]
    power_capacity = total_power.loc[total_power['spores'] == spore,'nameplate_capacity'].to_list()[0]
    storage_capacity = total_storage.loc[total_storage['spores'] == spore,'storage_capacity'].to_list()[0]
    bio_fuel = bio.loc[bio['spores'] == spore,'biofuel_use_rate'].to_list()[0]
    
    scenario_partial = scenario_partial.append({'id':spore,'infra':infrastructure,'pace':deployment_rate,
                                'import':import_dependency,'power':power_capacity,
                               'storage':storage_capacity,'bio':bio_fuel},ignore_index=True)
    """
    print('Object:')
    print('   infrastructure:',infrastructure)
    print('   deployment_rate:',deployment_rate)
    print('   import_dependency:',import_dependency)
    print('   power_capacity:',power_capacity)
    print('   storage_capacity:',storage_capacity)
    print('   biofule_use_rate:',bio_fuel)
    """

In [126]:
scenario_partial.to_csv('Scenario_partial.csv',index=False)

### Location objects

In [64]:
geo = geopandas.read_file('../portugal_regions.geojson')

### Preparing technology generation and storage data
Here, we will process CSV files to get data for two tables: TechGeneration and TechStorage

    Table: TechGeneration
    Columns
        location
        scenario
        technology_type
        energy_generation
    
    
    Table: TechStorage
    Columns
        scenario
        location
        technology_type
        energy_storage


In [132]:
tech_gen = pd.DataFrame(columns=['scenario','location','tech_type','value'])
tech_sto = pd.DataFrame(columns=['scenario','location','tech_type','value'])

In [133]:
trans = pd.read_csv('./energy/transmission_capacity.csv')
battery = nameplate.loc[nameplate['techs'] == 'battery',:]

In [135]:
def getTechGenerationRecords(scenario_id,tech_type,loc = None):
    """
    This function returns location wise value generation capacity for specified technology
    
    """

    sc_dataset = nameplate.loc[nameplate['spores'] == scenario_id,:]
    tech_data = sc_dataset.loc[sc_dataset['techs'] == tech_type,:][['spores','locs','nameplate_capacity']]
    
    if loc:
        return tech_data.loc[tech_data['locs'] == loc,:]['nameplate_capacity'].to_list()[0]
    else:
        return tech_data['locs'].to_list(),tech_data['nameplate_capacity'].to_list()

In [136]:
def getTechStorageRecords(scenario_id,tech_type,loc = None):
    """
    This function returns location wise value storage capacity for specified technology
    
    """
    sc_dataset = storage.loc[storage['spores'] == scenario_id,:]
    tech_data = sc_dataset.loc[sc_dataset['techs'] == tech_type,:][['spores','locs','storage_capacity']]
    
    if loc:
        return tech_data.loc[tech_data['locs'] == loc,:]['storage_capacity'].to_list()[0]
    else:
        return tech_data['locs'].to_list(),tech_data['storage_capacity'].to_list()

In [137]:
# TechGeneration Table
for s in scenario_partial.index:
    for tech in power_tech:
        locs,values = getTechGenerationRecords(s,tech)
        for loc in locs:
            record = {'scenario':s,'location':loc}
            value = getTechGenerationRecords(s,tech,loc)
            record['tech_type'] = tech
            record['value'] = value
            tech_gen = tech_gen.append(record,ignore_index=True) 

In [138]:
tech_gen.to_csv('TechGeneration.csv',index=False)

In [139]:
# TechStorage Table
for s in scenario_partial.index:
    for tech in power_tech:
        locs,values = getTechStorageRecords(s,tech)
        for loc in locs:
            record = {'scenario':s,'location':loc}
            value = getTechStorageRecords(s,tech,loc)
            record['tech_type'] = tech
            record['value'] = value
            tech_sto = tech_sto.append(record,ignore_index=True) 

In [74]:
tech_sto.to_csv('TechStorage.csv',index=False)

In [140]:
tech_sto.tech_type.unique()

array(['hydro_reservoir', 'pumped_hydro', 'battery'], dtype=object)

### Preparing Impact control tables
Here, we will process relavant CSV file for preparing data for technology generation and storage impact.

    Table: Impact
    Columns
        scenario
        location
        technology_type
        land_occupation
        marine_toxicity
        human_toxicity
        fossil_depletion
        metal_depletion
        climate_change
       

In [141]:
# Impact Storage table
agri = pd.read_csv('./environmental_impact/dfagricultural_land_occupation_ALOP.csv')
climate = pd.read_csv('./environmental_impact/dfclimate_change_GWP100.csv')
fossil = pd.read_csv('./environmental_impact/dffossil_depletion_FDP.csv')
human = pd.read_csv('./environmental_impact/dfhuman_toxicity_HTPinf.csv')
marine = pd.read_csv('./environmental_impact/dfmarine_ecotoxicity_METPinf.csv')
metal = pd.read_csv('./environmental_impact/dfmetal_depletion_MDP.csv')

In [142]:
# processor for extracting records for generation impact control
pro_gen_impact = ['energysystem.generation.electricity_generation.ccgt_electricity',
       'energysystem.generation.electricity_generation.chp_biofuel_extraction_electricity',
       'energysystem.generation.electricity_generation.hydro_reservoir_electricity',
       'energysystem.generation.electricity_generation.hydro_run_of_river_electricity']

pro_sto_impact = ['energysystem.storage.electricity_storage.battery_electricity',
       'energysystem.storage.electricity_storage.pumped_hydro_electricity']

In [143]:
def getImpactGenerationRecords(dataset,scenario_id,tech_type,loc = None):
    tech_type_label = ''.join(['energysystem.generation.electricity_generation.',tech_type,'_electricity'])
    
    sc_dataset = dataset.loc[dataset['Scenario'] == scenario_id,:]
    agri_e = sc_dataset.loc[sc_dataset['Processor'] == tech_type_label,:][['System','Value']]
    
    if loc:
        return agri_e.loc[agri_e['System'] == loc,:]['Value'].to_list()[0]
    else:
        return agri_e['System'].to_list(),agri_e['Value'].to_list()

In [144]:
# Building Impact table

impact_gen = pd.DataFrame(columns=['scenario','location','fossil_depletion','human_toxicity',
                                   'land_occupation','marine_toxicity','metal_depletion','climate_change',
                                   'tech_type'])

In [145]:
for s in scenario_partial.index:
    for tech in ['ccgt','chp_biofuel_extraction','hydro_reservoir','hydro_run_of_river']:
        locs,values = getImpactGenerationRecords(agri,s,tech)
        for loc in locs:
            record = {'scenario':s,'location':loc,'tech_type':tech}
            for key,dataset in ({'land_occupation':agri,'fossil_depletion':fossil,
                                'human_toxicity':human,'marine_toxicity':marine,
                                'metal_depletion':metal,'climate_change':climate}).items():
                value = getImpactGenerationRecords(dataset,s,tech,loc)
                record[key] = value
            impact_gen = impact_gen.append(record,ignore_index=True) 

In [147]:
impact_gen.groupby(by='scenario').sum().shape

(261, 6)

In [148]:
impact_gen.to_csv('ImpactGeneration.csv',index=False)

In [149]:
def getImpactStorageRecords(dataset,scenario_id,tech_type,loc = None):
    tech_type_label = ''.join(['energysystem.storage.electricity_storage.',tech_type,'_electricity'])
    
    sc_dataset = dataset.loc[dataset['Scenario'] == scenario_id,:]
    agri_e = sc_dataset.loc[sc_dataset['Processor'] == tech_type_label,:][['System','Value']]
    
    if loc:
        return agri_e.loc[agri_e['System'] == loc,:]['Value'].to_list()[0]
    else:
        return agri_e['System'].to_list(),agri_e['Value'].to_list()

In [150]:
impact_sto = pd.DataFrame(columns=['scenario','location','fossil_depletion','human_toxicity',
                                   'land_occupation','marine_toxicity','metal_depletion','climate_change',
                                   'tech_type'])

In [151]:
for s in scenario_partial.index:
    for tech in ['battery','pumped_hydro']:
            locs,values = getImpactStorageRecords(agri,s,tech)
            for loc in locs:
                record = {'scenario':s,'location':loc,'tech_type':tech}
                for key,dataset in ({'land_occupation':agri,'fossil_depletion':fossil,
                                'human_toxicity':human,'marine_toxicity':marine,
                                'metal_depletion':metal,'climate_change':climate}).items():
                    value = getImpactStorageRecords(dataset,s,tech,loc)
                    record[key] = value
                impact_sto = impact_sto.append(record,ignore_index=True) 

In [152]:
impact_sto.to_csv('ImpactStorage.csv',index=False)

In [153]:
# create a single impact table
impact = pd.concat([impact_gen,impact_sto],axis=0)
impact.reset_index(inplace=True)
impact.to_csv('Impact.csv',index=False)

In [162]:
tech_gen.tech_type.unique()

array(['chp_biofuel_extraction', 'chp_hydrogen', 'chp_methane_extraction',
       'chp_wte_back_pressure', 'electrolysis', 'existing_pv',
       'existing_wind', 'hydro_reservoir', 'hydro_run_of_river',
       'open_field_pv', 'pumped_hydro', 'wind_onshore', 'battery', 'ccgt',
       'roof_mounted_pv', 'wind_offshore'], dtype=object)

### Extending partial Scenario table by including remaining columns
We will now use the other dataframes to extend Scenario table which was previously created partially.

In [203]:
# columns to sum for each scenario 
results_columns = ['wind_onshore',
                   'wind_offshore',
                   'open_field_pv',
                   'roof_mounted_pv',
                   'hydro_run_of_river']

# extracting first results columns values

results_df = tech_gen.loc[tech_gen['tech_type'].isin(results_columns),:]
results_ag = results_df.groupby(by=['scenario','tech_type']).sum()
impact_ag = impact.groupby(by='scenario').sum()

In [207]:
impact_columns = ['fossil_depletion', 'human_toxicity', 'land_occupation',
       'marine_toxicity', 'metal_depletion', 'climate_change']

remaining_scenario_columns = ['scenario'] + results_columns + impact_columns + ['battery']

remaining_scenario = pd.DataFrame(columns = remaining_scenario_columns)

In [202]:
battery_sto = tech_sto.loc[tech_sto.tech_type=='battery',:].groupby(by=['scenario','tech_type']).sum()
battery_sto.loc[0,:].to_dict()

{'value': {'battery': 4.675065200126717e-07}}

In [208]:

for scenario in scenario_ids:
    record = {'scenario':scenario}
    temp_tech = results_ag.loc[scenario,:].to_dict()['value']
    temp_impact = impact_ag.loc[scenario,:][impact_columns].to_dict()
    temp_battery = battery_sto.loc[scenario,:].to_dict()['value']
    
    
    for key,value in temp_tech.items():
        record[key] = value
        
    for key,value in temp_impact.items():
        record[key] = value
        
    for key,value in temp_battery.items():
        record[key] = value
    
    temp_df = pd.DataFrame(record,index=[0])
    
    remaining_scenario = pd.concat([remaining_scenario,temp_df])

remaining_scenario.reset_index(inplace=True)

In [209]:
remaining_scenario.index

RangeIndex(start=0, stop=261, step=1)

In [210]:
scenario = scenario_partial.join(remaining_scenario)

In [223]:
import numpy as np
scenario.to_csv('Scenario_extended.csv',index=False)
scenario = scenario.replace({np.nan:None})

# Script to populate dataset

In [212]:
scenario = pd.read_csv('Scenario_extended.csv')
impact = pd.read_csv('Impact.csv')
tech_gen = pd.read_csv('TechGeneration.csv')
tech_sto = pd.read_csv('TechStorage.csv')

In [215]:
scenario['import_dep'] = scenario['import']

In [193]:
tech_gen.drop_duplicates(inplace=True)

In [195]:
for g in geo.itertuples():
    print({'location':g.index,'region':g.region_name})


{'location': 'PRT-1_1', 'region': 'Aveiro'}
{'location': 'PRT-3_1', 'region': 'Beja'}
{'location': 'PRT-4_1', 'region': 'Braga'}
{'location': 'PRT-6_1', 'region': 'Castelo Branco'}
{'location': 'PRT-7_1', 'region': 'Coimbra'}
{'location': 'PRT-10_1', 'region': 'Guarda'}
{'location': 'PRT-11_1', 'region': 'Leiria'}
{'location': 'PRT-12_1', 'region': 'Lisboa'}
{'location': 'PRT-14_1', 'region': 'Portalegre'}
{'location': 'PRT-15_1', 'region': 'Porto'}
{'location': 'PRT-18_1', 'region': 'Viana do Castelo'}
{'location': 'PRT-19_1', 'region': 'Vila Real'}
{'location': 'PRT-20_1', 'region': 'Viseu'}
{'location': 'PRT-5_1', 'region': 'Bragança'}
{'location': 'PRT-8_1', 'region': 'Évora'}
{'location': 'PRT-16_1', 'region': 'Santarém'}
{'location': 'PRT-17_1', 'region': 'Setúbal'}
{'location': 'PRT-9_1', 'region': 'Faro'}
{'location': 'PRT_1', 'region': 'Norte'}
{'location': 'PRT_2', 'region': 'Sul'}


In [213]:
scenario.columns

Index(['id', 'infra', 'pace', 'import', 'power', 'storage', 'bio', 'index',
       'scenario', 'wind_onshore', 'wind_offshore', 'open_field_pv',
       'roof_mounted_pv', 'hydro_run_of_river', 'fossil_depletion',
       'human_toxicity', 'land_occupation', 'marine_toxicity',
       'metal_depletion', 'climate_change', 'battery'],
      dtype='object')

In [None]:
for s in scenario.itertuples():
    print('Scenario id:',int(s.id))
    s_record = dict(power_capacity = s.power,
            storage_capacity = s.storage,
            community_infrastructure = s.infra,
            import_dependency = s.import_dep,
            implementation_pace = s.pace,
            bio_fuel = s.bio,
            wind_onshore = s.wind_onshore,
            wind_offshore = s.wind_offshore,
            open_field_pv = s.open_field_pv,
            roof_mounted_pv = s.roof_mounted_pv,
            hydro_run_of_river = s.hydro_run_of_river,
            climate_change = s.climate_change,
            land_occupation = s.land_occupation,
            marine_toxicity = s.marine_toxicity,
            human_toxicity = s.human_toxicity,
            metal_depletion = s.metal_depletion,
            battery = s.battery
            )
    
    pp.pprint(s_record)
    
    tech_gen_s = tech_gen[tech_gen['scenario'] == s.id]
    tech_sto_s = tech_sto[tech_sto['scenario'] == s.id]
    
    for tech_gen_record  in tech_gen_s.itertuples():
        print({'scenario':s.id,
              'location':tech_gen_record.location,
              'technology_type':tech_gen_record.tech_type,
              'energy_storage':tech_gen_record.value})
        
    for tech_sto_record  in tech_sto_s.itertuples():
        print({'scenario':s.id,
              'location':tech_sto_record.location,
              'technology_type':tech_sto_record.tech_type,
              'energy_storage':tech_sto_record.value})
        
    impact_gen_s = impact_gen[tech_gen['scenario'] == s.id]
    impact_sto_s = impact_sto[tech_sto['scenario'] == s.id]
    
    for impact_gen_record  in impact.itertuples():
        print({'scenario':s.id,
               'location':impact_gen_record.location,
               'technology_type':impact_gen_record.tech_type,
               'land_occupation':impact_gen_record.land_occupation,
               'marine_toxicity':impact_gen_record.marine_toxicity,
               'human_toxicity':impact_gen_record.human_toxicity,
               'metal_depletion':impact_gen_record.metal_depletion,
               'fossil_depletion':impact_gen_record.fossil_depletion
              })
    
    