# Challenge 2
##### A short series of python scripts to implement a calculator for  Adjusted CO2 Total emissions for a company from a given data.json

# Enums for constants

Step 1.

Define Enum for formula constants, to avoid hardcoding. In practice these may be saved into a database and invoked from there as well. 

This examples attempts multiple runs using different values. 

In [249]:
# 
"""
Enum class allowing constants mu_purch, mu_max_purch, phi_prod to be adjusted for a given run 
meaning that they would be the same for each company, but can be edited in between runs.

The cversion and cauthor fields are annotations 
for version control e.g. as diffs are submitted between versions. 

This can be also materialized using a database table in a prod environment.
"""
# 
from enum import Enum
class Carbonconstants (Enum):
  RUN1 = (0.5, 0.8, 0.05, '1.1', 'Employee 1')
  RUN2 = (0.5, 0.8, 0.06, '1.2', 'Employee 1')
  RUN3 = (0.6, 0.8, 0.05, '1.3', 'Employee 1')
  RUN4 = (0.5, 0.81, 0.05, '1.3', 'Employee 1')
  def __init__ (self, mu_purch, mu_max_purch, phi_prod, cversion, cauthor):
    self.mu_purch = mu_purch
    self.mu_max_purch = mu_max_purch
    self.phi_prod = phi_prod
    self.cversion = cversion
    self.cauthor = cauthor



In [2]:
Carbonconstants.RUN1

<Carbonconstants.RUN1: (0.5, 0.8, 0.05, '1.1', 'Employee 1')>

In [3]:
Carbonconstants.RUN1.mu_purch

0.5

In [4]:
assert Carbonconstants.RUN1.mu_purch == 0.5


In [5]:
type(Carbonconstants.RUN1.mu_max_purch)

float

##### Step 2.

Read the data and perform some cleanup


In [176]:
import json, re

In [177]:
RE_TRAILING_COMMA = re.compile(r',(?=\s*?[\}\]])')

In [178]:
with open("data.json", "r") as read_content:
  data = RE_TRAILING_COMMA.sub('', read_content.read())

In [179]:
data

'[\n  {\n    "ISIN": "US0000000000",\n    "Total Energy Use": 7000000,\n    "Total CO2 Equivalents Emissions": 94972.49198,\n    "Renewable Energy Purchased": 10576.00479,\n    "Renewable Energy Produced": 96652.16115,\n    "Carbon Credit Value": 8171.323352\n  },\n  {\n    "ISIN": "US0000000001",\n    "Total Energy Use": 80000000,\n    "Total CO2 Equivalents Emissions": 306900.6192,\n    "Renewable Energy Purchased": 31961.79405,\n    "Renewable Energy Produced": 38372.1921\n  },\n  {\n    "ISIN": "US0000000002",\n    "Total Energy Use": 153000000,\n    "Total CO2 Equivalents Emissions": 171320.1651,\n    "Renewable Energy Purchased": 62884.91047,\n    "Renewable Energy Produced": 10689.83474,\n    "Carbon Credit Value": 84303.7259\n  },\n  {\n    "ISIN": "US0000000003",\n    "Total Energy Use": 226000000,\n    "Total CO2 Equivalents Emissions": 2250.272892,\n    "Renewable Energy Purchased": 12949.74785,\n    "Renewable Energy Produced": null,\n    "Carbon Credit Value": null\n  },\n

In [181]:
carbondata=json.loads(data)

In [182]:
carbondata[0]

{'ISIN': 'US0000000000',
 'Total Energy Use': 7000000,
 'Total CO2 Equivalents Emissions': 94972.49198,
 'Renewable Energy Purchased': 10576.00479,
 'Renewable Energy Produced': 96652.16115,
 'Carbon Credit Value': 8171.323352}

In [183]:
def removew(d):
    clean_d = {k.replace(' ', '_'):v for k, v in d.items()}
    return clean_d

##### For each item in list 

Run the removew function to replace white spaces with underscores - to align them with a dict we will be building





In [187]:
carbondata = [removew(s) for s in carbondata]

In [202]:
carbondata

[{'ISIN': 'US0000000000',
  'Total_Energy_Use': 7000000,
  'Total_CO2_Equivalents_Emissions': 94972.49198,
  'Renewable_Energy_Purchased': 10576.00479,
  'Renewable_Energy_Produced': 96652.16115,
  'Carbon_Credit_Value': 8171.323352},
 {'ISIN': 'US0000000001',
  'Total_Energy_Use': 80000000,
  'Total_CO2_Equivalents_Emissions': 306900.6192,
  'Renewable_Energy_Purchased': 31961.79405,
  'Renewable_Energy_Produced': 38372.1921},
 {'ISIN': 'US0000000002',
  'Total_Energy_Use': 153000000,
  'Total_CO2_Equivalents_Emissions': 171320.1651,
  'Renewable_Energy_Purchased': 62884.91047,
  'Renewable_Energy_Produced': 10689.83474,
  'Carbon_Credit_Value': 84303.7259},
 {'ISIN': 'US0000000003',
  'Total_Energy_Use': 226000000,
  'Total_CO2_Equivalents_Emissions': 2250.272892,
  'Renewable_Energy_Purchased': 12949.74785,
  'Renewable_Energy_Produced': None,
  'Carbon_Credit_Value': None},
 {'ISIN': 'US0000000004',
  'Total_Energy_Use': 299000000,
  'Total_CO2_Equivalents_Emissions': 132696.7376,


In [201]:
pd.DataFrame([dict(s) for s in carbondata])

Unnamed: 0,ISIN,Total_Energy_Use,Total_CO2_Equivalents_Emissions,Renewable_Energy_Purchased,Renewable_Energy_Produced,Carbon_Credit_Value,CO2_Analytic
0,US0000000000,7000000,94972.49198,10576.00479,96652.16115,8171.323352,
1,US0000000001,80000000,306900.6192,31961.79405,38372.1921,,
2,US0000000002,153000000,171320.1651,62884.91047,10689.83474,84303.7259,
3,US0000000003,226000000,2250.272892,12949.74785,,,
4,US0000000004,299000000,132696.7376,42559.65263,69567.95366,17515.5,
5,US0000000005,0,204269.3867,9991.766644,55829.34503,26647.856,
6,US0000000006,445000000,64963.62994,9515.906056,24720.73308,10556.11359,53170.89797
7,US0000000007,518000000,89288.41927,32904.92748,4051.865565,8902.593156,80180.67966
8,US0000000008,591000000,59887.44837,4408.314004,31232.14481,8838.602818,
9,US0000000009,664000000,270793.8773,6147.617183,9418.298827,2135.478871,


# Validate the data

Step 3.

Using Pydantic for type checking and any missing but expected data needed by the formula

In [212]:
from pydantic import BaseModel, validator
import pandas as pd 

class CarbonCO2Data(BaseModel):
    ISIN: str
    Total_Energy_Use: float
    Total_CO2_Equivalents_Emissions: float
    Renewable_Energy_Purchased: float
    Renewable_Energy_Produced: float
    Carbon_Credit_Value: float
    CO2_Analytic: float

In [215]:
carbondata[6]

{'ISIN': 'US0000000006',
 'Total_Energy_Use': 445000000,
 'Total_CO2_Equivalents_Emissions': 64963.62994,
 'Renewable_Energy_Purchased': 9515.906056,
 'Renewable_Energy_Produced': 24720.73308,
 'Carbon_Credit_Value': 10556.11359,
 'CO2_Analytic': 53170.89797}

In [226]:
CarbonCO2Data(**carbondata[6])

CarbonCO2Data(ISIN='US0000000006', Total_Energy_Use=445000000.0, Total_CO2_Equivalents_Emissions=64963.62994, Renewable_Energy_Purchased=9515.906056, Renewable_Energy_Produced=24720.73308, Carbon_Credit_Value=10556.11359, CO2_Analytic=53170.89797)

In [240]:
def validate_df_data(data, model: BaseModel): 

    good_data = []
    bad_data = []
    for index,row in enumerate(data): 
        try:
            model(**row)  # unpacks our dictionary 
            good_data.append(row)  # appends valid data to a new 'good' dictionary
            print(f"Validation good for row {index}") #debug log
        except ValidationError as exc:
            row['Errors'] = [error_message['msg'] for error_message in exc.errors()]
            row['Error_row_num'] = index 
            bad_data.append(row)  # appends valid data to a new 'bad' dictionary
            print(f"Encountered validation error when parsing row {index} with {exc} Skipping...\n") #debug log 
          
    return (good_data, bad_data)

In [241]:
good_carbon_data, bad_carbon_data = validate_df_data(carbondata,CarbonCO2Data)

Encountered validation error when parsing row 0 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 1 with 2 validation errors for CarbonCO2Data
Carbon_Credit_Value
  field required (type=value_error.missing)
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 2 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 3 with 3 validation errors for CarbonCO2Data
Renewable_Energy_Produced
  none is not an allowed value (type=type_error.none.not_allowed)
Carbon_Credit_Value
  none is not an allowed value (type=type_error.none.not_allowed)
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 4 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field requi

# Calculate 

Step 4

Using elements from the Enum constants and detected values in the data

In [269]:
good_carbon_data

[{'ISIN': 'US0000000006',
  'Total_Energy_Use': 445000000,
  'Total_CO2_Equivalents_Emissions': 64963.62994,
  'Renewable_Energy_Purchased': 9515.906056,
  'Renewable_Energy_Produced': 24720.73308,
  'Carbon_Credit_Value': 10556.11359,
  'CO2_Analytic': 53170.89797},
 {'ISIN': 'US0000000007',
  'Total_Energy_Use': 518000000,
  'Total_CO2_Equivalents_Emissions': 89288.41927,
  'Renewable_Energy_Purchased': 32904.92748,
  'Renewable_Energy_Produced': 4051.865565,
  'Carbon_Credit_Value': 8902.593156,
  'CO2_Analytic': 80180.67966},
 {'ISIN': 'US0000000011',
  'Total_Energy_Use': 810000000,
  'Total_CO2_Equivalents_Emissions': 219384.6241,
  'Renewable_Energy_Purchased': 18766.81362,
  'Renewable_Energy_Produced': 13746.23147,
  'Carbon_Credit_Value': 47047.32813,
  'CO2_Analytic': 171647.988}]

In [262]:
c02data = good_carbon_data[0]
c02data['Total_CO2_Equivalents_Emissions']
c02data['CO2_Analytic']
c02data['Renewable_Energy_Purchased']
c02data['Total_Energy_Use']

# adjusted = ((c02data['Total_CO2_Equivalents_Emissions'] - co2data['CO2_Analytic'])*(1 - numpy.minimum((Carbonconstants.RUN1.mu_purch*(c02data['Renewable_Energy_Purchased']/c02data['Total_Energy_Use'])),Carbonconstants.RUN1.mu_max_purch))) - (c02data['Renewable_Energy_Produced']Carbonconstants.RUN1.phi_prod)



445000000

In [270]:
adjusted = ((c02data['Total_CO2_Equivalents_Emissions'] - c02data['CO2_Analytic'])*(1 - numpy.minimum((Carbonconstants.RUN1.mu_purch*(c02data['Renewable_Energy_Purchased']/c02data['Total_Energy_Use'])),Carbonconstants.RUN1.mu_max_purch))) - (c02data['Renewable_Energy_Produced']*Carbonconstants.RUN1.phi_prod)


In [271]:
adjusted

10556.569227764528

In [289]:
import numpy
def calc_adjtotal_co2_adjusted_emissions(c02data):
  adjusted = ((c02data['Total_CO2_Equivalents_Emissions'] - c02data['CO2_Analytic'])*(1 - numpy.minimum((Carbonconstants.RUN1.mu_purch*(c02data['Renewable_Energy_Purchased']/c02data['Total_Energy_Use'])),Carbonconstants.RUN1.mu_max_purch))) - (c02data['Renewable_Energy_Produced']*Carbonconstants.RUN1.phi_prod)
  c02data['Adjusted_Total_CO2_Equivalents_Emissions'] = adjusted  
  return c02data

def validate_df_data(data, model: BaseModel): 

    good_data = []
    bad_data = []
    for index,row in enumerate(data): 
        try:
            model(**row)  # unpacks our dictionary 
            calc_adjtotal_co2_adjusted_emissions(row) # call the adjustment calculation
            good_data.append(row)  # appends valid data to a new 'good' dictionary
            print(f"***** Validation good for row {index} ***** \n") #debug log
            #print(f"{calc_total_co2_adjusted_emissions(row)}") debug log
        except ValidationError as exc:
            row['Errors'] = [error_message['msg'] for error_message in exc.errors()]
            row['Error_row_num'] = index 
            bad_data.append(row)  # appends valid data to a new 'bad' dictionary
            print(f"Encountered validation error when parsing row {index} with {exc} Skipping...\n") #debug log 
          
    return (good_data, bad_data)

In [290]:
validate_df_data(carbondata,CarbonCO2Data)

Encountered validation error when parsing row 0 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 1 with 2 validation errors for CarbonCO2Data
Carbon_Credit_Value
  field required (type=value_error.missing)
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 2 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 3 with 3 validation errors for CarbonCO2Data
Renewable_Energy_Produced
  none is not an allowed value (type=type_error.none.not_allowed)
Carbon_Credit_Value
  none is not an allowed value (type=type_error.none.not_allowed)
CO2_Analytic
  field required (type=value_error.missing) Skipping...

Encountered validation error when parsing row 4 with 1 validation error for CarbonCO2Data
CO2_Analytic
  field requi

([{'ISIN': 'US0000000006',
   'Total_Energy_Use': 445000000,
   'Total_CO2_Equivalents_Emissions': 64963.62994,
   'Renewable_Energy_Purchased': 9515.906056,
   'Renewable_Energy_Produced': 24720.73308,
   'Carbon_Credit_Value': 10556.11359,
   'CO2_Analytic': 53170.89797,
   'Adjusted_Total_CO2_Equivalents_Emissions': 10556.569227764528},
  {'ISIN': 'US0000000007',
   'Total_Energy_Use': 518000000,
   'Total_CO2_Equivalents_Emissions': 89288.41927,
   'Renewable_Energy_Purchased': 32904.92748,
   'Renewable_Energy_Produced': 4051.865565,
   'Carbon_Credit_Value': 8902.593156,
   'CO2_Analytic': 80180.67966,
   'Adjusted_Total_CO2_Equivalents_Emissions': 8904.857056159875},
  {'ISIN': 'US0000000011',
   'Total_Energy_Use': 810000000,
   'Total_CO2_Equivalents_Emissions': 219384.6241,
   'Renewable_Energy_Purchased': 18766.81362,
   'Renewable_Energy_Produced': 13746.23147,
   'Carbon_Credit_Value': 47047.32813,
   'CO2_Analytic': 171647.988,
   'Adjusted_Total_CO2_Equivalents_Emissions

# Step 5. SQL into DB

Insert commands into Postgres

In [292]:
columns = good_carbon_data[0].keys()
columns

dict_keys(['ISIN', 'Total_Energy_Use', 'Total_CO2_Equivalents_Emissions', 'Renewable_Energy_Purchased', 'Renewable_Energy_Produced', 'Carbon_Credit_Value', 'CO2_Analytic', 'Adjusted_Total_CO2_Equivalents_Emissions'])

In [294]:
query = "INSERT INTO projects ({}) VALUES %s".format(','.join(columns))
query

'INSERT INTO projects (ISIN,Total_Energy_Use,Total_CO2_Equivalents_Emissions,Renewable_Energy_Purchased,Renewable_Energy_Produced,Carbon_Credit_Value,CO2_Analytic,Adjusted_Total_CO2_Equivalents_Emissions) VALUES %s'

In [296]:
values = [[value for value in each.values()] for each in good_carbon_data]
values

[['US0000000006',
  445000000,
  64963.62994,
  9515.906056,
  24720.73308,
  10556.11359,
  53170.89797,
  10556.569227764528],
 ['US0000000007',
  518000000,
  89288.41927,
  32904.92748,
  4051.865565,
  8902.593156,
  80180.67966,
  8904.857056159875],
 ['US0000000011',
  810000000,
  219384.6241,
  18766.81362,
  13746.23147,
  47047.32813,
  171647.988,
  47048.771523689764]]

In [300]:
import psycopg2
from psycopg2.extras import execute_values

columns = good_carbon_data[0].keys()
query = "INSERT INTO projects ({}) VALUES %s".format(','.join(columns))

# convert projects values to list of lists
values = [[value for value in each.values()] for each in good_carbon_data]

# demo cursor only with code to show how to upsert 
# use execute_values fast exec helper in postgres https://www.psycopg.org/docs/extras.html#psycopg2.extras.execute_values
try:
  cursor = conn.cursor()
  execute_values(cursor, query, values)
  cursor.close()
  conn.commit()
except Exception:
  print(f"=> Exception upserting - check postgres settings ***** \n") #debug log 


=> Exception upserting - check postgres settings ***** 



# Concluding Notes

The decimal places could need adjusting e.g. two decimal places.

There are other ways to validate e.g. pytest.

Example TDD patterns if developed into prod code -> https://testdriven.io/blog/modern-tdd/
