## **1 - Importing Libraries & Utilities**

In [None]:
from google.colab import files
import pandas as pd
import numpy as np
from IPython.display import clear_output
from time import sleep, perf_counter
from datetime import timedelta
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

In [None]:
from IPython.display import clear_output
!pip install selenium
!apt-get update
!apt-get install chromium chromium-driver
clear_output()

In [None]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import time
def web_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--verbose")
    options.add_argument('--no-sandbox')
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument("--window-size=1920, 1200")
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    return driver
#
driver = web_driver()

In [None]:
driver.get('https://apps.cer-rec.gc.ca/ftrppndc/dflt.aspx?GoCTemplateCulture=en-CA')
time.sleep(4)

#### **1.1 - Get List of Reports**

In [None]:
Reports_obj = driver.find_elements(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration")
reports_obj = Reports_obj[0].find_elements(by = By.TAG_NAME, value = 'option')
for i in range(len(reports_obj)):
  print(reports_obj[i].text)

Canada’s Energy Future 2016
Canada’s Energy Future 2016: Update
Canada’s Energy Future 2017
Canada’s Energy Future 2018
Canada’s Energy Future 2019
Canada’s Energy Future 2020
Canada’s Energy Future 2021
Canada’s Energy Future 2023


In [None]:
# Let's focus on the "Canada’s Energy Future 2023" report
Report_ = reports_obj[-1].text
print(Report_)

Canada’s Energy Future 2023


#### **1.2 - Get List of Appendices**

In [None]:
Appendices_obj = driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlAppendices")
appendices_obj = Appendices_obj.find_elements(by = By.TAG_NAME, value = 'option')
#for i in range(len(appendices_obj)):
#  print(appendices_obj[i].text)

In [None]:
Appendices_list = ['Macro Indicators', 'Benchmark Prices',
                   'End - Use Prices', 'End - Use Demand',
                   'Primary Energy Demand',
                   'Electricity Generation', 'Electricity Capacity',
                   'Electricity Interchange',
                   #'Hydrogen Production',
                   'Greenhouse Gas Emissions']

## **2 - Scraping Data & Generating DataFrames**

### **2.1 - Macro Indicators - Benchmark Prices**

#### **2.1.1 - Appendices of Interest**

In [None]:
# Appendices
for j in range(2):
  print(Appendices_list[j])

Macro Indicators
Benchmark Prices


#### **2.1.2 - List of Scenarios**

In [None]:
Scenarios_obj = driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase")
scenarios_ = Scenarios_obj.find_elements(by = By.TAG_NAME, value = 'option')
scenarios_list = []
[scenarios_list.append(elem.text) for elem in scenarios_]
scenarios_list

['Global Net-zero', 'Canada Net-zero', 'Current Measures']

**Remark:**

List of ***Scenarios*** is the same wether it has to do with "Macro Indicators" and "Benchmark Prices".

#### **2.1.3 - Function - Scrape Tables**

In [None]:
def scrape_tables(appendice_, scenario_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(appendice_)
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenario_)
  #
  driver.implicitly_wait(10)
  #
  tbody = driver.find_elements(by = By.TAG_NAME, value = 'tbody')
  field = driver.find_elements(by = By.CLASS_NAME, value = 'myGrid')
  #
  header_ = ['Metric']
  for year in field[1].text.split('\n')[0].split(' '):
    header_.append(year)
  #len(header_1_2)
  rows_ = []
  for j in range(0, len(field[1].text.split('\n'))-1):
    row = [field[0].text.split('\n')[j].strip()]
    temp = field[1].text.split('\n')[j+1]
    [row.append(elem) for elem in temp.split(' ')]
    rows_.append(row)
  df1 = pd.DataFrame([scenario_] * (len(field[1].text.split('\n'))-1), columns = ['Scenario'])
  df2 = pd.DataFrame(rows_, columns = header_)
  df = pd.concat([df1, df2], axis = 1)
  ###################
  return df

#### **2.1.4 - Building Dataframes**

In [None]:
start = perf_counter()
######################################
             ## df_0 - df_1 ##
######################################
for j in range(len(Appendices_list[:2])):
  globals()[f"df_{j}"] = pd.DataFrame()
  for i in range(len(scenarios_list)):
    df = scrape_tables(Appendices_list[j], scenarios_list[i])
    globals()[f"df_{j}"] = pd.concat([globals()[f"df_{j}"], df], axis = 0, ignore_index = True)
    del df
  globals()[f"df_{j}"] = globals()[f"df_{j}"].melt(['Scenario',	'Metric'], var_name = 'Year', value_name = 'Value')

### **2.2 - End - Use Prices / End - Use Demand**

#### **2.2.1 - Appendices of Interest and Scenarios**

In [None]:
# Appendices : End - Use Prices and End - Use Demand
# Scenarios are going to be the same: Global Net-zero, Canada Net-zero, Current Measures

# Sector
sector_opt = {}
for j in range(2,4):
  if 'Prices' in Appendices_list[j]:
    sector_opt[Appendices_list[j]] = "ctl00_ctl00_MainContent_MainContent_ddlSector"
  else:
    sector_opt[Appendices_list[j]] = "ctl00_ctl00_MainContent_MainContent_ddlRegion"

**Remark:**

List of ***Scenarios*** is going to be the same: Global Net-zero, Canada Net-zero and Current Measures.

#### **2.2.2 - Optional Lists - 4th Dropdown Menu**

In [None]:
opt_list = {}
for j in range(2,4):
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(Appendices_list[j])
  optional_ = Select(driver.find_element(by = By.ID, value = sector_opt[Appendices_list[j]]))
  #
  opt_ = []
  for elem in optional_.options:
    opt_.append(elem.text)
  if j == 2:#'Prices' in Appendices_list[j]:
    opt_list[j] = opt_[:-1]
    sector_list = opt_list[j]
  else:
    opt_list[j] = opt_[1:]
    region_list = opt_list[j]
  ###

#### **2.2.3 - Functions - Scrape Tables**

##### ***2.2.3.1 - Function - get_driver_2_3***

In [None]:
def get_driver_2_3(appendice_, scenario_, opt_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(appendice_)
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenario_) #
  #
  optional_ = Select(driver.find_element(by = By.ID, value = sector_opt[appendice_]))
  optional_.select_by_visible_text(opt_) #
  #
  driver.implicitly_wait(10)
  sleep(10)
  return driver

##### **2.2.3.2 - Function - conditions**

In [None]:
def conditions_(j):
  if j == 3:
    indices_ = list(range(1,4))
    yr = 7
    lvl = yr + 2
    head_ = 'Sector'
    tag_ = "h2"
    inc_ = 6
    value_name_ = 'End-Use Demand (PetaJoules)'
  elif j == 2:
    indices_ = list(range(13))
    yr = 3
    lvl = yr + 1
    head_ = 'Region'
    tag_ = "span"
    inc_ = 25
    value_name_ = 'Price (2022 CAD $ per GJ)'
  return indices_, yr, lvl, head_, tag_, inc_, value_name_

##### ***2.2.3.3 - Function - scrape_tables_2_3***

In [None]:
def scrape_tables_2_3(appendice_, scenario_, opt_):
  driver = get_driver_2_3(appendice_, scenario_, opt_)
  #
  tables_ = driver.find_elements(by = By.TAG_NAME, value = 'table')
  ################
  [indices_, yr, lvl, head_, tag_, inc_, value_name_] = conditions_(j)
  ################
  df = pd.DataFrame()
  #for item in opt_list:
  rows_ = []
  for ind in indices_:
    header_ = [head_]
    #tables_[3*ind].text.split('\n')[yr]
    for year in tables_[3*ind].text.split('\n')[yr].split(' '):
      header_.append(year)
    #
    field_ = driver.find_elements(by = By.TAG_NAME, value = tag_)
    row_ = [field_[ind + inc_].text.split('-')[0].split(' ')[0] if j==3 else field_[ind + inc_].text.split('-')[0]]
    for value in tables_[3*ind].text.split('\n')[lvl].split(' '):
      row_.append(value)
    rows_.append(row_)
    df1 = pd.DataFrame([opt_] * (len(rows_)), columns = [sector_opt[appendice_].split('_')[-1][3:]]) # ???
    df2 = pd.DataFrame(rows_, columns = header_)
    temp = pd.concat([df1, df2], axis = 1)
    del df1, df2
  df = pd.concat([df, temp], axis = 0, ignore_index = True)
  df = df.melt(list(df.columns)[:2], var_name = 'Year', value_name = value_name_)
  #############
  return df

#### **2.2.4 - Building Dataframes**


In [None]:
#start = perf_counter()
for j in range(2,4):
  print(Appendices_list[j])
  ########################
  df_ = pd.DataFrame()
  for i in range(len(scenarios_list)):
    print(i)
    temp = pd.DataFrame()
    for k in range(len(opt_list[j])):
      temp_1 = scrape_tables_2_3(Appendices_list[j], scenarios_list[i], opt_list[j][k])
      #
      temp = pd.concat([temp, temp_1], axis = 0, ignore_index = True)
    temp = pd.concat([pd.DataFrame([scenarios_list[i]] * int(temp.shape[0]), columns = ['Scenario']), temp], axis = 1)
    #
    df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
  ######################################
              ## df_2 - df_3 ##
  ######################################
  globals()[f"df_{j}"] = df_
#
#duration_ = (perf_counter()-start)/60

### **2.3 - Primary Energy Demand**

In [None]:
j = 4
print(Appendices_list[j])

#### **2.3.1 - Functions - Scrape Tables**

##### ***2.3.1.1 - Function - get_driver_4***

In [None]:
def get_driver_4(appendice_, scenario_, opt_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(appendice_)
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenario_) #
  #
  optional_ = Select(driver.find_element(by = By.ID, value = sector_opt[Appendices_list[3]]))
  optional_.select_by_visible_text(opt_) #
  #
  driver.implicitly_wait(10)
  sleep(10)
  return driver

##### ***2.3.1.2 - Function - scrape_tables_4***

In [None]:
def scrape_tables_4(appendice_, scenario_, opt_):
  driver = get_driver_4(appendice_, scenario_, opt_)
  #
  tables_ = driver.find_elements(by = By.TAG_NAME, value = 'table')
  field_ = driver.find_elements(by = By.TAG_NAME, value = 'h2')
  metric_ = field_[8].text
  #####
  rows_ = []
  header_ = ['Sources']
  table_ = tables_[0].text
  for year in table_.split('\n')[7].split(' '):
    header_.append(year)
  #
  rows_ = []
  for ind in range(9, len(table_.split('\n'))):
    row_ = [table_.split('\n')[ind-8]]
    for elem in table_.split('\n')[ind].split(' '):
      row_.append(elem)
    rows_.append(row_)
  temp = pd.DataFrame(rows_, columns = header_)
  temp = temp.melt('Sources', var_name = 'Year', value_name = metric_)
  temp = pd.concat([pd.DataFrame([opt_] * temp.shape[0], columns = ['Region']), temp], axis = 1)
  temp = pd.concat([pd.DataFrame([scenario_] * temp.shape[0], columns = ['Scenario']), temp], axis = 1)
  ##
  return temp

#### **2.3.2 - Building Dataframe**

In [None]:
#start = perf_counter()
print(Appendices_list[4])
########################
df_ = pd.DataFrame()
for i in range(len(scenarios_list)):
  print(i)
  temp = pd.DataFrame()
  for k in range(len(region_list)):          # opt_list[3] => region_list
    temp_1 = scrape_tables_4(Appendices_list[4], scenarios_list[i], region_list[k])
    #
    temp = pd.concat([temp, temp_1], axis = 0, ignore_index = True)
  df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
######################################
              ## df_4 ##
######################################
globals()[f"df_{j}"] = df_
#
#duration_ = (perf_counter()-start)/60
clear_output()
#
#print('Duration =', duration_, ' mins')

### **2.4 - Electricity Generation**

In [None]:
j = 5
print(Appendices_list[j])

#### **2.4.1 - Functions - Scrape Tables**

##### ***2.4.1.1 - Function - get_driver_5***

In [None]:
def get_driver_5(appendice_, scenario_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(appendice_)
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenario_) #
  #
  optional_ = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlType"))
  optional_.select_by_visible_text('Primary Fuel') #
  #
  driver.implicitly_wait(10)
  sleep(10)
  return driver

##### ***2.4.1.2 - Function - scrape_tables_5***

In [None]:
def scrape_tables_5(appendice_, scenario_, k):
  driver = get_driver_5(appendice_, scenario_)
  #
  tables_ = driver.find_elements(by = By.TAG_NAME, value = 'table')
  field_ = driver.find_elements(by = By.TAG_NAME, value = 'h2')
  metric_ = field_[5].text.split(':')[1].strip().split('-')[0].strip() + ' - (GWh)'
  regions_ = driver.find_elements(by = By.TAG_NAME, value = 'strong')
  #####
  header_ = ['Sources']
  [header_.append(year) for year in tables_[3*(0+1)].text.split('\n')[8].split(' ')]
  #
  rows_ = []
  for ind in range(8):
    row_ = []
    row_ = [tables_[3*(k+1)].text.split('\n')[ind]]
    [row_.append(digit) for digit in tables_[3*(k+1)].text.split('\n')[ind + 9].split(' ')]
    rows_.append(row_[:47])
  temp = pd.DataFrame(rows_, columns = header_)
  temp = temp.melt('Sources', var_name = 'Year', value_name = metric_)
  #
  temp = pd.concat([pd.DataFrame([regions_[58 + k].text] * temp.shape[0], columns = ['Region']), temp], axis = 1)
  temp = pd.concat([pd.DataFrame([scenarios_list[i]] * temp.shape[0], columns = ['Scenario']), temp], axis = 1)
  temp[metric_].replace('', 0, inplace = True)
  ##
  return temp

regions_ = driver.find_elements(by = By.TAG_NAME, value = 'strong')
for k in range(13):
  print(regions_[58 + k].text)

#### **2.4.2 - Building Dataframe**

In [None]:
#start = perf_counter()
j = 5
print(Appendices_list[j])
########################
df_ = pd.DataFrame()
for i in range(1):#len(scenarios_list)):
  print(i)
  temp = pd.DataFrame()
  for k in range(len(region_list)):
    print(region_list[k])
    temp_1 = scrape_tables_5(Appendices_list[j], scenarios_list[i], k)
    #
    temp = pd.concat([temp, temp_1], axis = 0, ignore_index = True)
  df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
######################################
              ## df_5 ##
######################################
globals()[f"df_{j}"] = df_
#
#duration_ = (perf_counter()-start)/60
clear_output()
#
#print('Duration =', duration_, ' mins')

### **2.5 - Electricity Capacity**

In [None]:
j = 6
print(Appendices_list[j])

#### **2.5.1 - Building Dataframes**

In [None]:
#start = perf_counter()
j = 6
print(Appendices_list[j])
########################
df_ = pd.DataFrame()
for i in range(len(scenarios_list)):
  print(i)
  temp = pd.DataFrame()
  for k in range(len(region_list)):
    temp_1 = scrape_tables_5(Appendices_list[j], scenarios_list[i], k)
    #
    temp = pd.concat([temp, temp_1], axis = 0, ignore_index = True)
  df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
######################################
              ## df_6 ##
######################################
globals()[f"df_{j}"] = df_
#
#duration_ = (perf_counter()-start)/60
clear_output()
#
#print('Duration =', duration_, ' mins')

### **2.6 - Electricity Interchange**

In [None]:
j = 7
print(Appendices_list[j])

#### **2.6.1 - Functions - Scrape Tables**

##### ***2.6.1.1 - Function - get_driver_7***

In [None]:
def get_driver_7(appendice_, scenario_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(Appendices_list[j])
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenarios_list[i])
  #
  driver.implicitly_wait(10)
  sleep(10)
  return driver

##### ***2.6.1.2 - Function - scrape_tables_7***

In [None]:
def scrape_tables_7(appendice_, scenario_, k):
  driver = get_driver_7(appendice_, scenario_)
  #
  tables_ = driver.find_elements(by = By.TAG_NAME, value = 'table')
  field_ = driver.find_elements(by = By.TAG_NAME, value = 'h2')
  metric_ = field_[5].text.split('-')[-1].strip()
  regions_ = driver.find_elements(by = By.TAG_NAME, value = 'strong')
  #####
  header_ = ['Interchange Flows']
  [header_.append(year) for year in tables_[3*0 + 2].text.split('\n')[0].split(' ')]
  #
  rows_ = []
  for ind in range(6):
    row_ = []
    row_ = [tables_[3*(k+1)].text.split('\n')[ind]]
    [row_.append(digit) for digit in tables_[3*(k+1)].text.split('\n')[ind + 7].split(' ')]
    rows_.append(row_[:47])
  temp = pd.DataFrame(rows_, columns = header_)
  temp = temp.melt(header_[0], var_name = 'Year', value_name = metric_)
  #
  temp = pd.concat([pd.DataFrame([regions_[58 + k].text] * temp.shape[0], columns = ['Region']), temp], axis = 1)
  temp = pd.concat([pd.DataFrame([scenarios_list[i]] * temp.shape[0], columns = ['Scenario']), temp], axis = 1)
  temp[metric_].replace('', 0, inplace = True)
  ##
  return temp

#### **2.6.2 - Building Dataframes**

In [None]:
#start = perf_counter()
j = 7
print(Appendices_list[j])
########################
df_ = pd.DataFrame()
for i in range(len(scenarios_list)):
  print(i)
  temp = pd.DataFrame()
  for k in range(len(region_list) - 3): # Yukon, Northwest Territories & Nunavut not included in the url
    temp_1 = scrape_tables_7(Appendices_list[j], scenarios_list[i], k)
    #
    temp = pd.concat([temp, temp_1], axis = 0, ignore_index = True)
  df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
######################################
              ## df_7 ##
######################################
globals()[f"df_{j}"] = df_
#
duration_ = (perf_counter()-start)/60
clear_output()
#
print('Duration =', duration_, ' mins')

### **2.7 - GHG emmissions - Electricity**

In [None]:
j = 8
print(Appendices_list[j])

#### **2.7.1 - Functions - Scrape Tables**

##### ***2.7.1.1 - Function - get_driver_8***

In [None]:
def get_driver_8(appendice_, scenario_):
  selectReport = Select(driver.find_element(by = By.ID, value = "ctl00_ctl00_MainContent_MainContent_ddlIteration"))
  selectReport.select_by_visible_text('Canada’s Energy Future 2023')
  #
  selectAppendice = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlAppendices"))
  selectAppendice.select_by_visible_text(Appendices_list[j])
  #
  selectSenario = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlCase"))
  selectSenario.select_by_visible_text(scenarios_list[i]) #
  #
  optional_ = Select(driver.find_element(by = By.NAME, value = "ctl00$ctl00$MainContent$MainContent$ddlUnits"))
  optional_.select_by_visible_text('Megatonnes') #
  #
  driver.implicitly_wait(10)
  sleep(10)
  return driver

##### ***2.7.1.2 - Function - scrape_tables_8***

In [None]:
def scrape_tables_8(appendice_, scenario_):
  driver = get_driver_8(appendice_, scenario_)
  #
  tables_ = driver.find_elements(by = By.TAG_NAME, value = 'table')
  field_ = driver.find_elements(by = By.TAG_NAME, value = 'h2')
  metric_ = 'Electricity_GHG Emmisions - (' +field_[5].text.split('-')[-1].strip().split(' ')[0] + ' CO2 Equivalent)'
  #####
  header_ = []
  [header_.append(year) for year in tables_[3*0 + 2].text.split('\n')[0].split(' ')]
  #
  row_ = [tables_[0].text.split('\n')[4 + 12].split(' ')]
  temp = pd.DataFrame(row_, columns = header_)
  temp = temp.melt(var_name = 'Year', value_name = metric_)
  #
  temp = pd.concat([pd.DataFrame([scenarios_list[i]] * temp.shape[0], columns = ['Scenario']), temp], axis = 1)
  temp[metric_].replace('', 0, inplace = True)
  ##
  return temp

#### **2.7.2 - Building Dataframes**

In [None]:
j = 8
print(Appendices_list[j])
########################
df_ = pd.DataFrame()
for i in range(len(scenarios_list)):
  temp = scrape_tables_8(Appendices_list[j], scenarios_list[i])
  #
  df_ = pd.concat([df_, temp], axis = 0, ignore_index = True)
######################################
              ## df_8 ##
######################################
globals()[f"df_{j}"] = df_

#### **2.7.3 - Inferring GHG Data by Sector**

In [None]:
df_3['End-Use Demand (PetaJoules)'] = df_3['End-Use Demand (PetaJoules)'].astype(float)
df_8['Electricity_GHG Emmisions - (Megatonnes CO2 Equivalent)'] = df_8['Electricity_GHG Emmisions - (Megatonnes CO2 Equivalent)'].astype(float)

In [None]:
df_3['GHG Emmisions (KTons_CO2_Equivalent)'] = ''
#
for scenario_ in scenarios_list:
  for year_ in df_3['Year'].unique().tolist():
    Sum_tot = df_3[(df_3['Scenario'] == scenario_) & (df_3['Year'] == year_)]['End-Use Demand (PetaJoules)'].sum()
    tot_CO2 = df_8[(df_8['Scenario'] == scenario_) & (df_8['Year'] == year_)]['Electricity_GHG Emmisions - (Megatonnes CO2 Equivalent)'].values[0]
    #
    for region_ in region_list:
      Sum_region = df_3[(df_3['Scenario'] == scenario_) & (df_3['Region'] == region_) & (df_3['Year'] == year_)]['End-Use Demand (PetaJoules)'].sum()
      for sector_ in sector_list:
        index = df_3[(df_3['Scenario'] == scenario_) & (df_3['Region'] == region_) & (df_3['Year'] == year_) & (df_3['Sector'] == sector_)]['End-Use Demand (PetaJoules)'].index[0]
        value_ = df_3[(df_3['Scenario'] == scenario_) & (df_3['Region'] == region_) & (df_3['Year'] == year_) & (df_3['Sector'] == sector_)]['End-Use Demand (PetaJoules)'].values[0]
        #
        df_3['GHG Emmisions (KTons_CO2_Equivalent)'][index] = (value_ / Sum_region / Sum_tot)*tot_CO2*1000