# Optimizing for CO2 storage from CO2 Emitters

## Setup

In [1]:
import numpy as np
import pandas as pd
from pulp import *
from geopy.distance import distance
from geopy.distance import great_circle
import random
import pickle

In [16]:
def facility_cleaning(fac_df, emiss_df):
    facilitiesB = fac_df.rename(inplace=False,
                                    columns={'Facility Name':'facility_name',
                                             'City':'city',
                                             'Primary NAICS Code':'naics_code',
                                             'Industry Type (subparts)':'industry_subparts',
                                             'Industry Type (sectors)':'industry_sectors'})

    emsCA2019loc = pd.merge(emiss_df, facilitiesB[['Facility Id',
                                                    'facility_name',
    #                                               'Facility Name2',
                                                   'city',
                                                   'Zip Code',
                                                   'Address',
                                                   'County',
                                                   'Latitude',
                                                   'Longitude',
    #                                               'Primary NAICS Code2',
                                                   'industry_subparts',
                                                   'industry_sectors']],
                            how='left', on='Facility Id')
    emsCA2019locagg = emsCA2019loc[['Facility Id','facility_name','Latitude','Longitude',
                                    'Unit CO2 emissions (non-biogenic)']].dropna().groupby(
                                        ['Facility Id','facility_name']).agg({'Latitude':'min',
                                                            'Longitude':'min',
                                                            'Unit CO2 emissions (non-biogenic)':'sum'})
    emsCA2019locagg.reset_index(inplace=True)
    emsCA2019locagg = emsCA2019locagg.rename(columns={'Facility Id':'facility_id',
                                                      'facility_name':'facility_name',
                                                      'Latitude':'lat',
                                                      'Longitude':'lon',
                                                      'Unit CO2 emissions (non-biogenic)':'annual_emissions'})
    emsCA2019locagg = emsCA2019locagg[emsCA2019locagg['annual_emissions'] > 0]
    facility_latlon = emsCA2019locagg[['lat', 'lon']].to_records(index=False)
    facility_ids = emsCA2019locagg['facility_id'].tolist()
    facility_names = emsCA2019locagg['facility_name'].tolist()
    supply_volume = emsCA2019locagg['annual_emissions'].tolist()
    supply_trucks = [i/25 for i in supply_volume]
    return facility_latlon, facility_ids, facility_names, supply_volume, supply_trucks


def get_distances(facility_latlon, pools_latlon):
    pools_distance_matrix = []
    for i in range(len(facility_latlon)):
        distances = []
        for j in range(len(pools_latlon)):
            dist = great_circle(facility_latlon[i], pools_latlon[j]).miles
            distances.append(dist)
        pools_distance_matrix.append(distances)
    return pools_distance_matrix


def get_complex_costs(distance_matrix, pool_trucks):
    complex_costs = []
    for i in range(len(distance_matrix)):
        new = []
        for j in range(len(distance_matrix[i])):
            opening_cost = 2000000/pool_trucks[j]
            # opening cost = $2,000,000/number of trucks for the pool
            # cost / truck = 2*$2*distance + opening cost
            cost = 4 * distance_matrix[i][j] + opening_cost
            new.append(cost)
        complex_costs.append(new)
    return complex_costs


def run_optimization(n_receivers, n_suppliers, supply_trucks, demand_trucks, costs, distances, facility_names, facility_latlon, pools_ids, pools_latlon, industry_str, year):
    print("Running optimization for {} industry and {} year timeframe".format(industry_str, year))
    supply_trucks = [i*year for i in supply_trucks]
    
    p_prob = LpProblem('Unbalaced_Transportation_Problem', LpMinimize)
    #costs = pools_distance_matrix_complexcost
    routes = [(i, j) for i in range(n_suppliers) for j in range(n_receivers)]

    x = LpVariable.dicts('X', routes, lowBound=0)
    p_prob += lpSum([x[i, j] * costs[i][j] for i in range(n_suppliers) for j in range(n_receivers)])

    for i in range(n_suppliers):
        p_prob += lpSum([x[i, j] for j in range(n_receivers)]) == supply_trucks[i]

    for j in range(n_receivers):
        p_prob += lpSum([x[i, j] for i in range(n_suppliers)]) <= demand_trucks[j]

    # Solving problem
    p_prob.solve()
    print("Problem successfully solved")
    
    volumes_moved = [i.varValue for i in p_prob.variables()]
    volume_matrix = []
    k=0
    while k < n_suppliers:
        vols = []
        for i in range(n_receivers):
            j = i + k*n_receivers
            vols.append(volumes_moved[j])
        volume_matrix.append(vols)
        k+=1
    print("Volume matrix created")
        
    results = []
    for i in range(n_suppliers):
        f_name = facility_names[i]
        f_loc = facility_latlon[i]
        for j in range(n_receivers):
            vol = volume_matrix[i][j]
            if vol > 0:
                w_id = pools_ids[j]
                w_loc = pools_latlon[j]
                dist = distances[i][j]
                cost = costs[i][j]
                r = [f_name, f_loc, w_id, w_loc, dist, cost, vol, industry_str, year]
                results.append(r)
    results_df = pd.DataFrame(results, columns = ['facility_name', 'facility_location','pool_id','pool_location','distance','cost','n_trucks', 'industry', 'timeframe'])
    print("Results created")
    print()
    print("--------------------------------------------")
    
    return results_df


## Data import and cleaning

In [17]:
wells = pd.read_csv("data/wells/AllWells_20210915.csv")

# Oil & Gas, Dry Gas (no liquids), Gas and "Liquefied Gas"
wells_oilgas = wells[ (wells['WellType'] == 'OG')  |
                      (wells['WellType'] == 'DG')  |
                      (wells['WellType'] == 'GAS') |
                      (wells['WellType'] == 'LG') ]
wells_latlon = wells_oilgas[['Latitude', 'Longitude']].to_records(index=False)
wells_ids = wells_oilgas['API'].tolist()
pooled_wells = pd.read_csv("data/pool_volumes.csv")
pools_latlon = pooled_wells[['Latitude', 'Longitude']].to_records(index=False)
pools_ids = pooled_wells['pool_id'].tolist()
pools_volumes = pooled_wells['totalco2'].tolist()
pools_trucks = [i/25 for i in pools_volumes]
n_pools = len(pools_ids)

In [18]:
emissions = pd.read_csv(r'./data/Emissions by Unit and Fuel Type - csv/UNIT_DATA-Table 1.csv')
emsCA2019 = emissions[emissions['Reporting Year'] == 2019]
facilities = pd.read_excel(r'./data/ghgp_data_2019.xlsx',
                          sheet_name='Direct Emitters', skiprows=3)
# One Michegan emitter lists its CA headquarters. Retag it as Michigan
facilities.loc[facilities['Facility Id'] == 1000594, 'State'] = 'MI'

## Prepare data for optimization

In [19]:
facilities = facilities[(facilities.State == 'CA')]
facilities_cement = facilities[facilities['Cement Production'] > 0]
facilities_oil = facilities[facilities['Petroleum Refining'] > 0]
facilities_energy = facilities[facilities['Electricity Generation'] > 0]
facilities_steel = facilities[facilities['Iron and Steel Production'] > 0]

facility_latlon, facility_ids, facility_names, supply_volume, supply_trucks = facility_cleaning(facilities, emsCA2019)
n_facilities = len(facility_ids)

facility_c_latlon, facility_c_ids, facility_c_names, supply_c_volume, supply_c_trucks = facility_cleaning(facilities_cement, emsCA2019)
n_facilities_c = len(facility_c_ids)

facility_o_latlon, facility_o_ids, facility_o_names, supply_o_volume, supply_o_trucks = facility_cleaning(facilities_oil, emsCA2019)
n_facilities_o = len(facility_o_ids)

facility_e_latlon, facility_e_ids, facility_e_names, supply_e_volume, supply_e_trucks = facility_cleaning(facilities_energy, emsCA2019)
n_facilities_e = len(facility_e_ids)

facility_s_latlon, facility_s_ids, facility_s_names, supply_s_volume, supply_s_trucks = facility_cleaning(facilities_steel, emsCA2019)
n_facilities_s = len(facility_s_ids)

facility_total_latlon = np.concatenate((facility_c_latlon, facility_o_latlon, facility_e_latlon, facility_s_latlon))
facility_total_ids = np.concatenate((facility_c_ids, facility_o_ids, facility_e_ids, facility_s_ids))
facility_total_names = np.concatenate((facility_c_names, facility_o_names, facility_e_names, facility_s_names))
supply_volume_total = np.concatenate((supply_c_volume, supply_o_volume, supply_e_volume, supply_s_volume))
supply_trucks_total = np.concatenate((supply_c_trucks, supply_o_trucks, supply_e_trucks, supply_s_trucks))
n_facilities_total = len(facility_total_ids)

In [20]:
facilitiesB = facilities_cement.rename(inplace=False,
                                    columns={'Facility Name':'facility_name',
                                             'City':'city',
                                             'Primary NAICS Code':'naics_code',
                                             'Industry Type (subparts)':'industry_subparts',
                                             'Industry Type (sectors)':'industry_sectors'})
emsCA2019loc = pd.merge(emsCA2019, facilitiesB[['Facility Id',
                                                    'facility_name',
    #                                               'Facility Name2',
                                                   'city',
                                                   'Zip Code',
                                                   'Address',
                                                   'County',
                                                   'Latitude',
                                                   'Longitude',
    #                                               'Primary NAICS Code2',
                                                   'industry_subparts',
                                                   'industry_sectors']],
                            how='left', on='Facility Id')
emsCA2019locagg = emsCA2019loc[['Facility Id','facility_name','Latitude','Longitude',
                                    'Unit CO2 emissions (non-biogenic)']].dropna().groupby(
                                        ['Facility Id','facility_name']).agg({'Latitude':'min',
                                                            'Longitude':'min',
                                                            'Unit CO2 emissions (non-biogenic)':'sum'})
emsCA2019locagg.reset_index(inplace=True)
emsCA2019locagg = emsCA2019locagg.rename(columns={'Facility Id':'facility_id',
                                                      'facility_name':'facility_name',
                                                      'Latitude':'lat',
                                                      'Longitude':'lon',
                                                      'Unit CO2 emissions (non-biogenic)':'annual_emissions'})
emsCA2019locagg = emsCA2019locagg[emsCA2019locagg['annual_emissions'] > 0]
emsCA2019locagg

Unnamed: 0,facility_id,facility_name,lat,lon,annual_emissions
0,1002308,CEMEX Construction Materials Pacific LLC,34.6222,-117.1001,12834.3
1,1002431,HANSON PERMANENTE CEMENT,37.3181,-122.091,464.9
2,1004612,LEHIGH SOUTHWEST CEMENT CO.,40.7369,-122.3223,1072.2
3,1005662,Mitsubishi Cement Corp Cushenbury Cement Plant,34.437557,-116.891034,421.7
4,1006642,NATIONAL CEMENT CO OF CALIFORNIA INC,34.819863,-118.748732,2476.8
5,1006842,CalPortland Company Mojave Plant,35.029298,-118.316236,712.0
6,1007927,CalPortland Company Oro Grande Plant,34.6045,-117.3382,270.5


In [21]:
distances_c = get_distances(facility_c_latlon, pools_latlon)
complex_costs_c = get_complex_costs(distances_c, pools_trucks)

distances_o = get_distances(facility_o_latlon, pools_latlon)
complex_costs_o = get_complex_costs(distances_o, pools_trucks)

distances_e = get_distances(facility_e_latlon, pools_latlon)
complex_costs_e = get_complex_costs(distances_e, pools_trucks)

distances_s = get_distances(facility_s_latlon, pools_latlon)
complex_costs_s = get_complex_costs(distances_s, pools_trucks)

distances_total = get_distances(facility_total_latlon, pools_latlon)
complex_costs_total = get_complex_costs(distances_total, pools_trucks)

In [22]:
industry_list = ['cement', 'oil_refining', 'steel', 'energy', 'total']
year_list = [1, 5, 10, 20, 30]

inputs_list = []
for i in year_list:
    cement = [n_facilities_c, supply_c_trucks, complex_costs_c, distances_c, facility_c_names, facility_c_latlon, 'cement', i]
    oil = [n_facilities_o, supply_o_trucks, complex_costs_o, distances_o, facility_o_names, facility_o_latlon, 'oil_refining', i]
    energy = [n_facilities_e, supply_e_trucks, complex_costs_e, distances_e, facility_e_names, facility_e_latlon, 'energy', i]
    steel = [n_facilities_s, supply_s_trucks, complex_costs_s, distances_s, facility_s_names, facility_s_latlon, 'steel', i]
    total = [n_facilities_s, supply_trucks_total, complex_costs_total, distances_total, facility_total_names, facility_total_latlon, 'total', i]
    
    inputs_list.append(cement)
    inputs_list.append(oil)
    inputs_list.append(steel)
    inputs_list.append(energy)
    inputs_list.append(total)

    

## Run optimization and write results

In [23]:
detailed_results = pd.DataFrame()

for i in inputs_list:
    results = run_optimization(n_pools,
                               i[0],
                               i[1],
                               pools_trucks,
                               i[2],
                               i[3],
                               i[4],
                               i[5],
                               pools_ids,
                               pools_latlon,
                               i[6],
                               i[7])
    detailed_results = detailed_results.append(results)


Running optimization for cement industry and 1 year timeframe
Problem successfully solved
Volume matrix created
Results created

--------------------------------------------
Running optimization for oil_refining industry and 1 year timeframe
Problem successfully solved
Volume matrix created
Results created

--------------------------------------------
Running optimization for steel industry and 1 year timeframe
Problem successfully solved
Volume matrix created
Results created

--------------------------------------------
Running optimization for energy industry and 1 year timeframe
Problem successfully solved
Volume matrix created
Results created

--------------------------------------------
Running optimization for total industry and 1 year timeframe
Problem successfully solved
Volume matrix created
Results created

--------------------------------------------
Running optimization for cement industry and 5 year timeframe
Problem successfully solved
Volume matrix created
Results create

In [24]:
# re calculate cost as 2 * ($2*miles)*(volume/25) + 2,000,000
detailed_results['volume'] = detailed_results['n_trucks']*25
detailed_results['cost'] = 4*detailed_results['distance']*(detailed_results['volume']/25) + 2000000
detailed_results['cost_per_ton'] = detailed_results['cost']/detailed_results['volume']
detailed_results.sort_values(by=['cost_per_ton'])

Unnamed: 0,facility_name,facility_location,pool_id,pool_location,distance,cost,n_trucks,industry,timeframe,volume,cost_per_ton
75,AES Alamitos,"[33.7688, -118.1009]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",6.343615,9.813351e+06,307921.850,energy,10,7698046.25,1.274785
58,AES Alamitos,"[33.7688, -118.1009]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",6.343615,8.166919e+06,243036.440,energy,5,6075911.00,1.344147
132,Valero Wilmington Asphalt Plant,"[33.79699, -118.23971]",Wilmington:Fault Block I:Schist,"[33.765217, -118.233666]",2.222575,2.567409e+06,63823.370,oil_refining,30,1595584.25,1.609071
74,AES Alamitos,"[33.7688, -118.1009]",Old Wilmington (ABD):Fault Block VIII Offshore...,"[33.741771, -118.162185]",3.985213,3.311183e+06,82252.994,energy,10,2056324.85,1.610243
229,BEAR MOUNTAIN LIMITED,"[35.44735, -119.08652]",Canfield Ranch:Old River (ABD):Stevens,"[35.261257, -119.136154]",13.158499,4.390326e+07,796125.430,energy,30,19903135.75,2.205846
...,...,...,...,...,...,...,...,...,...,...,...
2,LEHIGH SOUTHWEST CEMENT CO.,"[40.7369, -122.3223]",Tejon:Western:Transition,"[34.9871215, -118.927774]",438.208754,2.075176e+06,42.888,cement,1,1072.20,1935.437034
5,CalPortland Company Mojave Plant,"[35.029298, -118.316236]",McKittrick:Northeast:Carneros,"[35.318584, -119.622457]",76.431114,2.008707e+06,28.480,cement,1,712.00,2821.217742
1,HANSON PERMANENTE CEMENT,"[37.3181, -122.091]",Orcutt:Main:,"[34.82379913, -120.4094315]",196.256199,2.014598e+06,18.596,cement,1,464.90,4333.401422
3,Mitsubishi Cement Corp Cushenbury Cement Plant,"[34.437557, -116.891034]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",88.873503,2.005996e+06,16.868,cement,1,421.70,4756.927847


In [25]:
summary_results = detailed_results.groupby(by=['industry','timeframe']).agg(
    volume_transferred=('volume', 'sum'),
    total_cost=('cost', 'sum'),
    total_distance=('distance', 'sum')  
)
summary_results['cost_per_ton'] = summary_results['total_cost']/summary_results['volume_transferred']
summary_results



Unnamed: 0_level_0,Unnamed: 1_level_0,volume_transferred,total_cost,total_distance,cost_per_ton
industry,timeframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cement,1,18252.4,14437360.0,1343.970661,790.984005
cement,5,91262.0,16186780.0,1343.970661,177.366069
cement,10,182524.0,18373560.0,1343.970661,100.663828
cement,20,365048.0,22747130.0,1343.970661,62.312707
cement,30,547572.0,27120690.0,1343.970661,49.529
energy,1,27951620.0,991670800.0,16156.576702,35.478119
energy,5,139758100.0,4216909000.0,18861.65051,30.17292
energy,10,279516200.0,8708564000.0,24872.893488,31.155851
energy,20,559032300.0,16664180000.0,32792.606566,29.808967
energy,30,838548500.0,23928420000.0,51978.944864,28.535525


In [26]:
detailed_results['volume'] = detailed_results['volume']/1000
detailed_results['cost'] = detailed_results['cost']/1000000

In [27]:
summary_results['volume_transferred'] = summary_results['volume_transferred']/1000
summary_results['total_cost'] = summary_results['total_cost']/1000000

In [28]:
detailed_results.sort_values(by=['cost_per_ton'])

Unnamed: 0,facility_name,facility_location,pool_id,pool_location,distance,cost,n_trucks,industry,timeframe,volume,cost_per_ton
75,AES Alamitos,"[33.7688, -118.1009]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",6.343615,9.813351,307921.850,energy,10,7698.04625,1.274785
58,AES Alamitos,"[33.7688, -118.1009]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",6.343615,8.166919,243036.440,energy,5,6075.91100,1.344147
132,Valero Wilmington Asphalt Plant,"[33.79699, -118.23971]",Wilmington:Fault Block I:Schist,"[33.765217, -118.233666]",2.222575,2.567409,63823.370,oil_refining,30,1595.58425,1.609071
74,AES Alamitos,"[33.7688, -118.1009]",Old Wilmington (ABD):Fault Block VIII Offshore...,"[33.741771, -118.162185]",3.985213,3.311183,82252.994,energy,10,2056.32485,1.610243
229,BEAR MOUNTAIN LIMITED,"[35.44735, -119.08652]",Canfield Ranch:Old River (ABD):Stevens,"[35.261257, -119.136154]",13.158499,43.903262,796125.430,energy,30,19903.13575,2.205846
...,...,...,...,...,...,...,...,...,...,...,...
2,LEHIGH SOUTHWEST CEMENT CO.,"[40.7369, -122.3223]",Tejon:Western:Transition,"[34.9871215, -118.927774]",438.208754,2.075176,42.888,cement,1,1.07220,1935.437034
5,CalPortland Company Mojave Plant,"[35.029298, -118.316236]",McKittrick:Northeast:Carneros,"[35.318584, -119.622457]",76.431114,2.008707,28.480,cement,1,0.71200,2821.217742
1,HANSON PERMANENTE CEMENT,"[37.3181, -122.091]",Orcutt:Main:,"[34.82379913, -120.4094315]",196.256199,2.014598,18.596,cement,1,0.46490,4333.401422
3,Mitsubishi Cement Corp Cushenbury Cement Plant,"[34.437557, -116.891034]",Wilmington:Fault Block V-B:Lower Terminal,"[33.759059905, -118.21071625]",88.873503,2.005996,16.868,cement,1,0.42170,4756.927847


In [29]:
list_results = detailed_results.values.tolist()
file_name = 'data/results/full_detailed_results.pkl'
open_file = open(file_name, "wb")
pickle.dump(list_results, open_file)
open_file.close()

html_results = detailed_results.to_html()
html_file = open("data/results/full_detailed_results.html", "w")
html_file.write(html_results)
html_file.close()

In [30]:
total_pool_volume = sum(pools_volumes)/1000

summary_results['percent_pool_volume_used'] = summary_results['volume_transferred'] / total_pool_volume
summary_results

Unnamed: 0_level_0,Unnamed: 1_level_0,volume_transferred,total_cost,total_distance,cost_per_ton,percent_pool_volume_used
industry,timeframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cement,1,18.2524,14.437356,1343.970661,790.984005,1.1e-05
cement,5,91.262,16.186782,1343.970661,177.366069,5.3e-05
cement,10,182.524,18.373564,1343.970661,100.663828,0.000105
cement,20,365.048,22.747129,1343.970661,62.312707,0.00021
cement,30,547.572,27.120693,1343.970661,49.529,0.000315
energy,1,27951.615605,991.670757,16156.576702,35.478119,0.016088
energy,5,139758.0781,4216.909352,18861.65051,30.17292,0.080441
energy,10,279516.156315,8708.56364,24872.893488,31.155851,0.160881
energy,20,559032.312795,16664.175893,32792.606566,29.808967,0.321762
energy,30,838548.468008,23928.42114,51978.944864,28.535525,0.482643


In [31]:
list_summary_results = summary_results.values.tolist()
file_name = 'data/results/summary_results.pkl'
open_file = open(file_name, "wb")
pickle.dump(list_summary_results, open_file)
open_file.close()

html_summary_results = detailed_results.to_html()
html_file = open("data/results/summary_results.html", "w")
html_file.write(html_summary_results)
html_file.close()