In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("gen_build_cost.csv")
df.head(10)

Unnamed: 0,GENERATION_PROJECT,build_year,gen_overnight_cost,gen_fixed_om
0,Anhui-Central_PV-2022,2022,642662.46,9639.9369
1,Anhui-Coal-2022,2022,526267.805,9999.088295
2,Anhui-Gas-2022,2022,669180.897,16060.341528
3,Anhui-Hydro_Nonpumped-2022,2022,1133648.446,17004.72669
4,Anhui-Nuclear-2022,2022,1954433.096,39088.66192
5,Anhui-PV-2022,2022,642662.46,9639.9369
6,Anhui-Wind-2022,2022,993632.241,14904.48362
7,Beijing-Central_PV-2022,2022,642662.46,9639.9369
8,Beijing-Coal-2022,2022,526267.805,9999.088295
9,Beijing-Gas-2022,2022,669180.897,16060.341528


In [10]:
generation_df = df.reindex(columns=["GENERATION_PROJECT"]).set_index("GENERATION_PROJECT")
generation_df.head(10)

Anhui-Central_PV-2022
Anhui-Coal-2022
Anhui-Gas-2022
Anhui-Hydro_Nonpumped-2022
Anhui-Nuclear-2022
Anhui-PV-2022
Anhui-Wind-2022
Beijing-Central_PV-2022
Beijing-Coal-2022
Beijing-Gas-2022


In [54]:
gen_list = list(generation_df.index)
gen_zone_list = [x.split('-')[0] for x in gen_list]
zone_list = set(gen_zone_list)
zone_list = sorted(list(zone_list))
zone_list[:3]

['Anhui', 'Beijing', 'Chongqing']

In [33]:
zone_id_dict = {zone_list[i - 1]: i for i in range(1, len(zone_list) + 1)}
list(zone_id_dict.items())[:3]

[('Anhui', 1), ('Beijing', 2), ('Chongqing', 3)]

In [47]:
energy_list = set([x.split('-')[1] for x in gen_list])
energy_list = sorted(list(energy_list))
energy_list

['Central_PV', 'Coal', 'Gas', 'Hydro_Nonpumped', 'Nuclear', 'PV', 'Wind']

In [36]:
energy_id_dict = {energy_list[i - 1]: i for i in range(1, len(energy_list) + 1)}
list(energy_id_dict.items())[:3]

[('Central_PV', 1), ('Coal', 2), ('Gas', 3)]

In [40]:
gen_dbid_list = []
for g in gen_list:
    zone, energy, year = g.split('-')
    if year == '2022':
        gen_dbid_list.append(f"existing.{zone_id_dict[zone]}-{energy_id_dict[energy]}")
    else :
        gen_dbid_list.append(f"new.{zone_id_dict[zone]}-{energy_id_dict[energy]}")
generation_df["gen_dbid"] = gen_dbid_list
generation_df.head(5)

Unnamed: 0_level_0,gen_dbid
GENERATION_PROJECT,Unnamed: 1_level_1
Anhui-Central_PV-2022,existing.1-1
Anhui-Coal-2022,existing.1-2
Anhui-Gas-2022,existing.1-3
Anhui-Hydro_Nonpumped-2022,existing.1-4
Anhui-Nuclear-2022,existing.1-5


In [46]:
gen_energy_type_list = [g.split('-')[1] for g in gen_list]
gen_energy_type_list[:5]

['Central_PV', 'Coal', 'Gas', 'Hydro_Nonpumped', 'Nuclear']

In [61]:
generation_df["gen_energy_source"] = gen_energy_type_list
generation_df.head(5)

Unnamed: 0_level_0,gen_dbid,gen_energy_source,gen_load_zone
GENERATION_PROJECT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Anhui-Central_PV-2022,existing.1-1,Central_PV,Anhui
Anhui-Coal-2022,existing.1-2,Coal,Anhui
Anhui-Gas-2022,existing.1-3,Gas,Anhui
Anhui-Hydro_Nonpumped-2022,existing.1-4,Hydro_Nonpumped,Anhui
Anhui-Nuclear-2022,existing.1-5,Nuclear,Anhui


In [62]:
generation_df.loc[generation_df.index.str.contains("PV"), "gen_energy_source"] = "Solar"
generation_df.loc[generation_df.index.str.contains("Nuclear"), "gen_energy_source"] = "Uranium"
generation_df.loc[generation_df.index.str.contains("Hydro_Nonpumped"), "gen_energy_source"] = "Water"
pd.concat([generation_df.head(5), generation_df.tail(5)])

Unnamed: 0_level_0,gen_dbid,gen_energy_source,gen_load_zone
GENERATION_PROJECT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Anhui-Central_PV-2022,existing.1-1,Solar,Anhui
Anhui-Coal-2022,existing.1-2,Coal,Anhui
Anhui-Gas-2022,existing.1-3,Gas,Anhui
Anhui-Hydro_Nonpumped-2022,existing.1-4,Water,Anhui
Anhui-Nuclear-2022,existing.1-5,Uranium,Anhui
Zhejiang-Gas-2023,new.32-3,Gas,Zhejiang
Zhejiang-Hydro_Nonpumped-2023,new.32-4,Water,Zhejiang
Zhejiang-Nuclear-2023,new.32-5,Uranium,Zhejiang
Zhejiang-PV-2023,new.32-6,Solar,Zhejiang
Zhejiang-Wind-2023,new.32-7,Wind,Zhejiang


In [63]:
generation_df["gen_load_zone"] = gen_zone_list
generation_df.head(5)

Unnamed: 0_level_0,gen_dbid,gen_energy_source,gen_load_zone
GENERATION_PROJECT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Anhui-Central_PV-2022,existing.1-1,Solar,Anhui
Anhui-Coal-2022,existing.1-2,Coal,Anhui
Anhui-Gas-2022,existing.1-3,Gas,Anhui
Anhui-Hydro_Nonpumped-2022,existing.1-4,Water,Anhui
Anhui-Nuclear-2022,existing.1-5,Uranium,Anhui


In [64]:
gen_max_age_dict = {
    "Coal": 35,
    "Gas": 20,
    "Uranium": 40,
    "Water": 100,
    "Wind": 30,
    "Solar": 20
}
gen_max_age_list = []
source_list = list(generation_df["gen_energy_source"])
for source in source_list:
    gen_max_age_list.append(gen_max_age_dict[source])
generation_df["gen_max_age"] = gen_max_age_list
generation_df.head(7)

Unnamed: 0_level_0,gen_dbid,gen_energy_source,gen_load_zone,gen_max_age
GENERATION_PROJECT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anhui-Central_PV-2022,existing.1-1,Solar,Anhui,20
Anhui-Coal-2022,existing.1-2,Coal,Anhui,35
Anhui-Gas-2022,existing.1-3,Gas,Anhui,20
Anhui-Hydro_Nonpumped-2022,existing.1-4,Water,Anhui,100
Anhui-Nuclear-2022,existing.1-5,Uranium,Anhui,40
Anhui-PV-2022,existing.1-6,Solar,Anhui,20
Anhui-Wind-2022,existing.1-7,Wind,Anhui,30
