In [1]:
import pandas as pd
import datetime

In [2]:
file_path_onedrive = '/Volumes/System/Users/weisun/OneDrive/AER work'
file_path_external = '/Volumes/EnergyData/AER'

In [3]:
fueltype_mapping_dict = dict()
fueltype_mapping_dict['Wind'] = 'Renewable'
fueltype_mapping_dict['Solar'] = 'Renewable'
fueltype_mapping_dict['Hydro'] = 'Hydro'
fueltype_mapping_dict['Gas'] = 'Gas'
fueltype_mapping_dict['Black Coal'] = 'Black coal'
fueltype_mapping_dict['Brown Coal'] = 'Brown coal'
fueltype_mapping_dict['Coal Seam Methane'] = 'Other'
fueltype_mapping_dict['Bagasse'] = 'Other'
fueltype_mapping_dict['Waste Coal Mine Gas'] = 'Other'
fueltype_mapping_dict['Landfill Gas'] = 'Other'
fueltype_mapping_dict['Battery'] = 'Other'
fueltype_mapping_dict['Diesel'] = 'Other'
fueltype_mapping_dict['Kerosene'] = 'Other'
fueltype_mapping_dict['Biofuel'] = 'Other'
fueltype_mapping_dict['Biogas'] = 'Other'
fueltype_mapping_dict['Sludge Biogas'] = 'Other'


In [4]:
state_regionid_mapping_dict = dict()
state_regionid_mapping_dict['NSW'] = 'NSW1'
state_regionid_mapping_dict['ACT'] = 'NSW1'
state_regionid_mapping_dict['VIC'] = 'VIC1'
state_regionid_mapping_dict['QLD'] = 'QLD1'
state_regionid_mapping_dict['SA'] = 'SA1'
state_regionid_mapping_dict['TAS'] = 'TAS1'

In [5]:
def parse_string_number(string):
    value = 0
    chunked = string.split(',')
    chunked = [int(chunk) for chunk in chunked]
    chunk_length = len(chunked)
    for i in range(chunk_length):
#         print(chunked[i])
        value += chunked[i]*(1000**(chunk_length-(i+1)))
    return value

In [12]:
def read_and_process_cer_gen_emission(file_path_onedrive,
                                      fueltype_mapping_dict,
                                      state_regionid_mapping_dict,
                                      FY):
    temp_df = pd.read_csv(f'{file_path_onedrive}/Electricity emission/CER_generation_and_emission/greenhouse-and-energy-information-designated-generation-facility-{FY}.csv',
                          usecols=['State','ACTUAL_GEN','EMISSION','Grid','FUELTYPE'],encoding='utf-8',encoding_errors='ignore')
    temp_df = temp_df[(temp_df['Grid']=='NEM')&
                      (temp_df['State'].isin(['NSW','QLD','VIC','SA','TAS','ACT']))]
    temp_df['ACTUAL_GEN'] = temp_df['ACTUAL_GEN'].apply(lambda x: parse_string_number(x))
    temp_df['EMISSION'] = temp_df['EMISSION'].apply(lambda x: parse_string_number(x))
    temp_df['FUEL_CAT'] = temp_df['FUELTYPE'].apply(lambda x: fueltype_mapping_dict[x])
    
    temp_df = temp_df.rename(columns={'State':'REGIONID'})
    temp_df['REGIONID'] = temp_df['REGIONID'].apply(lambda x:state_regionid_mapping_dict[x])
    
    temp_df = temp_df.groupby(by=['REGIONID','FUEL_CAT','FUELTYPE'],as_index=False)[['ACTUAL_GEN','EMISSION']].sum()
    temp_df['FY'] = FY
    return temp_df

In [7]:
# temp_df = read_and_process_cer_gen_emission(file_path_onedrive,
#                                             fueltype_mapping_dict,state_regionid_mapping_dict,
#                                             'FY2022_2023')

In [13]:
FY_list = ['FY2017_2018',
           'FY2018_2019',
           'FY2019_2020',
           'FY2020_2021',
           'FY2021_2022',
           'FY2022_2023']

cer_emission_df = pd.DataFrame()
for FY in FY_list:
    temp_df = read_and_process_cer_gen_emission(file_path_onedrive,
                                                fueltype_mapping_dict,
                                                state_regionid_mapping_dict,
                                                FY)
    cer_emission_df = pd.concat([cer_emission_df,temp_df])

In [14]:
cer_emission_df.to_csv(f'{file_path_onedrive}/Electricity emission/cer_generation_emission_WS.csv',index=False)