In [1]:
import pandas as pd
import requests, zipfile, io
!pip install pyxlsb
import pyxlsb
from IPython.core.display import HTML
import yfinance as yf

import warnings
warnings.filterwarnings('ignore')

# Set up some default parameters for graphing
import matplotlib.pyplot as plt
from matplotlib import cycler
colour = "#00C2AB" # The default colour for the barcharts
colors = cycler('color', ['#4FBBA9', '#E56D13', '#D43A69','#25539f', '#88BB44', '#FFBBBB'])
plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',
       axisbelow=True, grid=True, prop_cycle=colors)
plt.rc('grid', color='w', linestyle='solid')
plt.rc('xtick', direction='out', color='gray')
plt.rc('ytick', direction='out', color='gray')
plt.rc('patch', edgecolor='#E6E6E6')
plt.rc('lines', linewidth=2)
font = {'family' : 'DejaVu Sans',
        'weight' : 'normal',
        'size'   : 16}
plt.rc('font', **font)
plt.rc('legend', fancybox = True, framealpha=1, shadow=True, borderpad=1)



In [107]:
# We will load in the sheets in the unzipped XLSX file that contain various kinds of direct emissions, from individual facilities and sectors/activities
ghg_fac = pd.read_excel("ghgp_data_2020.xlsx",
  skiprows = 1, # skip the first two rows, which are irrelevant
  header = 2, # the third row represents the header
  converters={'Primary NAICS Code':str}, # read the NAICS codes as strings
  sheet_name = ["Direct Emitters", "Onshore Oil & Gas Prod.", "Gathering & Boosting", 
    "Transmission Pipelines", "LDC - Direct Emissions", "SF6 from Elec. Equip.", "Suppliers"]) 

# We will also load in a file that tells us about the type of industry each facility is classified as
naics = pd.read_excel("https://www.census.gov/naics/2017NAICS/6-digit_2017_Codes.xlsx",
  header = 0, # the row that represents the header
  converters={'2017 NAICS Code':str,'2017 NAICS Title':str}
)
naics=naics[["2017 NAICS Code", "2017 NAICS Title"]]

In [187]:
ghg_fac["Suppliers"]["Emissions (tCO2e)"] = pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Coal-based liquid fuel production"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Produced"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Imported"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Exported"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Natural Gas Supply"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Natural Gas Liquids Supply"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with CO2 Supply "], errors='coerce').fillna(0)

emissions_field_names = {
    "Direct Emitters": "Total reported direct emissions",
    "Onshore Oil & Gas Prod.": "Total reported emissions from Onshore Oil & Gas Production ",
    "Gathering & Boosting": "Total reported emissions from Gathering & Boosting",
    "Transmission Pipelines": "Total reported direct emissions from Transmission Pipelines",
    "LDC - Direct Emissions": "Total reported direct emissions from Local Distribution Companies",
    "SF6 from Elec. Equip.": "Total reported direct emissions from Electrical Equipment Use",
}

fac_directemitters_data = pd.DataFrame([], columns=["Facility", "Name", "Primary NAICS Code", "Emissions (tCO2e)", "Longitude", "Latitude"]) 
fac_suppliers_data = pd.DataFrame(ghg_fac["Suppliers"]) 

for key in emissions_field_names.keys():
  dump = ghg_fac[key]
  dump = dump[["Facility Id", "Facility Name", "Primary NAICS Code", emissions_field_names[key]]]
  dump = dump.rename(columns={"Facility Id": "Facility", "Facility Name": "Name", "Primary NAICS Code": "2017 NAICS Code", emissions_field_names[key]:"Emissions (tCO2e)"})
  dump["Emissions (tCO2e)"] = pd.to_numeric(dump["Emissions (tCO2e)"])
  fac_directemitters_data= fac_directemitters_data.append(dump)

fac_directemitters_data = fac_directemitters_data.groupby(by=["Facility", "Name", "2017 NAICS Code"]).sum().reset_index() #For presentation purposes, we'll add in the facility name
fac_suppliers_data=fac_suppliers_data.rename(columns={"Facility Id": "Facility", "Facility Name": "Name", "Primary NAICS Code": "2017 NAICS Code"})
fac_suppliers_data = fac_suppliers_data[["Facility", "Name", "2017 NAICS Code", "Emissions (tCO2e)"]]
fac_directemitters_data['2017 NAICS Code']=fac_directemitters_data['2017 NAICS Code'].astype(str)

# Look up industry information
fac_directemitters_data = fac_directemitters_data.merge(naics, on="2017 NAICS Code")
fac_suppliers_data = fac_suppliers_data.merge(naics, on="2017 NAICS Code")

sectors = ghg_fac['Direct Emitters'][["Facility Id","Industry Type (sectors)", "Longitude", "Latitude", "Methane (CH4) emissions ", "Nitrous Oxide (N2O) emissions "]]

fac_directemitters_data = fac_directemitters_data.set_index("Facility").join(sectors.set_index('Facility Id'))
fac_suppliers_data = fac_suppliers_data.set_index("Facility").join(sectors.set_index('Facility Id'))


In [188]:
url = "https://www.epa.gov/system/files/other-files/2021-10/ghgp_data_parent_company_10_2021.xlsb"
parents = pd.read_excel(url, engine='pyxlsb', sheet_name="2020", nrows=9144) # 9144 non-header rows in this file as of Nov 2021.
parents["PARENT COMPANY NAME"] = parents["PARENT COMPANY NAME"].str.upper() # Standardize company names to uppercase - sometimes EPA allows company names to be reported in upper and lower case, which intereferes with our ability to aggregate them
parents.columns

Index(['GHGRP FACILITY ID', 'FRS ID (FACILITY)', 'REPORTING YEAR',
       'FACILITY NAME', 'FACILITY ADDRESS', 'FACILITY CITY', 'FACILITY STATE',
       'FACILITY ZIP', 'FACILITY COUNTY', 'PARENT COMPANY NAME',
       'PARENT CO. STREET ADDRESS', 'PARENT CO. CITY', 'PARENT CO. STATE',
       'PARENT CO. ZIP', 'PARENT CO. PERCENT OWNERSHIP',
       'FACILITY NAICS CODE'],
      dtype='object')

In [189]:
join_emitters = fac_directemitters_data.join(parents.set_index('GHGRP FACILITY ID'))
join_suppliers = fac_suppliers_data.join(parents.set_index('GHGRP FACILITY ID'))

In [191]:
# plt.hist(join_suppliers['Emissions (tCO2e)'])
join_emitters['Longitude']

1000001   -122.685533
1000002    -85.198134
1000003    -78.393500
1000005   -120.104120
1000007   -106.527500
              ...    
1014015    -81.933190
1014021    -90.958167
1014023    -94.900224
1014024           NaN
1014028    -95.089810
Name: Longitude, Length: 8487, dtype: float64

In [192]:
nas_s = join_suppliers.loc[join_suppliers["Emissions (tCO2e)"]  == 0]

In [193]:
nas_s.columns

Index(['Name', '2017 NAICS Code', 'Emissions (tCO2e)', '2017 NAICS Title',
       'Industry Type (sectors)', 'Longitude', 'Latitude',
       'Methane (CH4) emissions ', 'Nitrous Oxide (N2O) emissions ',
       'FRS ID (FACILITY)', 'REPORTING YEAR', 'FACILITY NAME',
       'FACILITY ADDRESS', 'FACILITY CITY', 'FACILITY STATE', 'FACILITY ZIP',
       'FACILITY COUNTY', 'PARENT COMPANY NAME', 'PARENT CO. STREET ADDRESS',
       'PARENT CO. CITY', 'PARENT CO. STATE', 'PARENT CO. ZIP',
       'PARENT CO. PERCENT OWNERSHIP', 'FACILITY NAICS CODE'],
      dtype='object')

In [194]:
nas_s['PARENT COMPANY NAME']

1000035                             THE ANDERSONS INC
1000035                       MARATHON PETROLEUM CORP
1000035                       MARATHON PETROLEUM CORP
1000039                           GENERAL ELECTRIC CO
1000158     ASCEND PERFORMANCE MATERIALS HOLDINGS INC
                              ...                    
1014033                           AW DISTRIBUTING INC
1014041                                     BLUON INC
1014043                          RAMJ ENTERPRISES INC
1014044                      WILHELMSEN SHIPS SERVICE
1014063    TRANSOCEAN OFFSHORE DEEPWATER DRILLING INC
Name: PARENT COMPANY NAME, Length: 364, dtype: object

In [195]:
join_emitters["PARENT EMISSIONS AMT"] = join_emitters['Emissions (tCO2e)'] * join_emitters["PARENT CO. PERCENT OWNERSHIP"] / 100 
join_suppliers["PARENT EMISSIONS AMT"] = join_suppliers['Emissions (tCO2e)'] * join_suppliers["PARENT CO. PERCENT OWNERSHIP"] / 100 
summary_e = join_emitters.groupby("PARENT COMPANY NAME")[["PARENT EMISSIONS AMT"]].agg(['sum', 'count'])
summary_s = join_suppliers.groupby("PARENT COMPANY NAME")[["PARENT EMISSIONS AMT"]].agg(['sum', 'count'])

In [196]:
len(set(join_emitters["PARENT COMPANY NAME"]))

2941

In [197]:
join_emitters

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,Industry Type (sectors),Longitude,Latitude,Methane (CH4) emissions,Nitrous Oxide (N2O) emissions,FRS ID (FACILITY),...,FACILITY ZIP,FACILITY COUNTY,PARENT COMPANY NAME,PARENT CO. STREET ADDRESS,PARENT CO. CITY,PARENT CO. STATE,PARENT CO. ZIP,PARENT CO. PERCENT OWNERSHIP,FACILITY NAICS CODE,PARENT EMISSIONS AMT
1000001,PSE Ferndale Generating Station,221112,509154.612,Fossil Fuel Electric Power Generation,Power Plants,-122.685533,48.828707,236.00,281.312,1.100005e+11,...,98248,WHATCOM COUNTY,PUGET HOLDINGS LLC,PO Box 97034,Bellevue,WA,98009,100.0,221112,509154.612
1000002,Ardagh Glass Inc. (Dunkirk),327213,113519.804,Glass Container Manufacturing,Minerals,-85.198134,40.371053,37.00,44.104,1.100412e+11,...,47336,JAY COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,113519.804
1000003,Ardagh Glass Inc. (Henderson),327213,81110.872,Glass Container Manufacturing,Minerals,-78.393500,36.290700,27.00,33.972,1.100015e+11,...,27537,VANCE COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,81110.872
1000005,Ardagh Glass Inc. (Madera),327213,79854.076,Glass Container Manufacturing,Minerals,-120.104120,36.923750,28.00,33.376,1.100005e+11,...,93637,MADERA COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,79854.076
1000007,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,327420,14084.496,Gypsum Product Manufacturing,Minerals,-106.527500,35.330083,6.75,8.046,1.100070e+11,...,87004,SANDOVAL COUNTY,EAGLE MATERIALS INC,5960 Berkshire Lane Suite 800,Dallas,TX,75225,100.0,327420,14084.496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014015,Cedar Grove #2 Mine,212112,19219.500,Bituminous Coal Underground Mining,Other,-81.933190,37.817746,19219.50,,,...,25654,,ARACOMA COAL CO LLC,PO Box 1098,Holden,WV,25625,100.0,212112,19219.500
1014021,"Peco Foods, Inc.",311615,87368.842,Poultry Processing,"Other,Waste",-90.958167,36.215944,85081.75,1.192,1.100674e+11,...,72455,RANDOLPH COUNTY,PECO FOODS INC,1101 Greensboro Ave,Tuscaloosa,AL,35401,100.0,311615,87368.842
1014023,JSW Steel (USA) Inc.,331110,43100.388,Iron and Steel Mills and Ferroalloy Manufactur...,Metals,-94.900224,29.698855,20.25,24.138,1.100007e+11,...,77523,CHAMBERS COUNTY,JSW STEEL USA INC,5200 East McKinney Road,Baytown,TX,77520,100.0,331110,43100.388
1014024,"TITUS OIL & GAS PRODUCTION, LLC",211120,53440.912,Crude Petroleum Extraction,,,,,,,...,76102,,TITUS OIL & GAS,"420 Throckmorton St, Suite 1150",Fort Worth,TX,76102,100.0,211120,53440.912


In [169]:
uniqueEmitters = list(set(join_emitters["PARENT COMPANY NAME"]))

In [83]:
def getStock(search_term):
    results = []
    query = requests.get(f'https://yfapi.net/v6/finance/autocomplete?region=IN&lang=en&query={search_term}', 
    headers={
        'accept': 'application/json',
        'X-API-KEY': '67OTHABAMI9EcpD7FKoMbHerxF8yNY35fPHdU5mc'
    })
    response = query.json()
    for i in response['ResultSet']['Result']:
        final = i['symbol']
        results.append(final)
    
    symbol = None
        
    try:
        stock = yf.Ticker(results[0])
        price = stock.info["regularMarketPrice"]
        full_name = stock.info['longName']
        curreny = stock.info["currency"]
        symbol = stock.info["symbol"]
    except Exception as e:
        pass
        # print('Something went wrong')
        # print(e)
        
    return stock.info

In [198]:
ticker = {}

In [199]:
iex = pd.read_json('etf-conversions/iex_symbols.json')
iex['name'] = iex['name'].str.upper()

In [200]:
iex['name']

0                                AGILENT TECHNOLOGIES INC.
1                                               ALCOA CORP
2        LISTED FUNDS TRUST - AAF FIRST PRIORITY CLO BO...
3        GOLDMAN SACHS PHYSICAL GOLD ETF TRUST - GOLDMA...
4                   ARES ACQUISITION CORPORATION - CLASS A
                               ...                        
11883                            ZURN WATER SOLUTIONS CORP
11884                                         ZYMERGEN INC
11885                                        ZYMEWORKS INC
11886                          ZYNERBA PHARMACEUTICALS INC
11887                                            ZYNEX INC
Name: name, Length: 11888, dtype: object

In [83]:
iexNames = iex['name']
matches = []
for name in uniqueEmitters:
    for i, iexName in enumerate(iexNames):
        if name in iexName:
            iex['name'][i] = name
            
            matches.append(name)
            break
matches, len(set(matches))

(['ALPHABET INC',
  'CHS INC',
  'CVR ENERGY INC',
  'EVERSOURCE ENERGY',
  'PBF ENERGY INC',
  'MARTIN MIDSTREAM PARTNERS LP',
  'PAR PACIFIC HOLDINGS INC',
  'RANGE RESOURCES CORP',
  'NRG ENERGY INC',
  'TREDEGAR CORP',
  'ABBVIE INC',
  'INGEVITY CORP',
  'COMPASS MINERALS INTERNATIONAL INC',
  'AEMETIS INC',
  'ORANGE COUNTY',
  'FORD MOTOR CO',
  'NEWMONT CORP',
  'TEXAS INSTRUMENTS INC',
  'VISTRA CORP',
  'MARATHON OIL CORP',
  'NEXTERA ENERGY INC',
  'KELLOGG CO',
  'NEW YORK CITY',
  'PPL CORP',
  'PARK-OHIO HOLDINGS CORP',
  'EAGLE MATERIALS INC',
  'SUMMIT MATERIALS INC',
  'CLEARWAY ENERGY INC',
  'OASIS PETROLEUM INC',
  'MOLSON COORS BEVERAGE CO',
  'NATURAL RESOURCE PARTNERS LP',
  'HERITAGE-CRYSTAL CLEAN INC',
  'NEENAH INC',
  'ALPHA METALLURGICAL RESOURCES',
  'BIOGEN INC',
  'ARCH RESOURCES INC',
  'AMPCO-PITTSBURGH CORP',
  'MURPHY OIL CORP',
  '3M CO',
  'CONAGRA BRANDS INC',
  'HUNTINGTON INGALLS INDUSTRIES INC',
  'WASTE CONNECTIONS INC',
  'ENTERGY CORP',
  'PI

In [84]:
sorted(matches)

['3M CO',
 'ABB LTD',
 'ABBOTT LABORATORIES',
 'ABBVIE INC',
 'ADVANSIX INC',
 'AEMETIS INC',
 'AES CORP',
 'AIR PRODUCTS & CHEMICALS INC',
 'ALBEMARLE CORP',
 'ALCOA CORP',
 'ALEXANDER & BALDWIN INC',
 'ALLEGHENY TECHNOLOGIES INC',
 'ALLIANT ENERGY CORP',
 'ALLISON TRANSMISSION HOLDINGS INC',
 'ALPHA & OMEGA SEMICONDUCTOR',
 'ALPHA METALLURGICAL RESOURCES',
 'ALPHABET INC',
 'ALTO INGREDIENTS INC',
 'ALTRIA GROUP INC',
 'AMEREN CORP',
 'AMERICAN AIRLINES GROUP INC',
 'AMGEN INC',
 'AMPCO-PITTSBURGH CORP',
 'AMPLIFY ENERGY CORP',
 'ANALOG DEVICES INC',
 'ANTERO RESOURCES CORP',
 'ARCH RESOURCES INC',
 'ARCHER DANIELS MIDLAND CO',
 'ARCHROCK INC',
 'ARCONIC CORP',
 'ARCOSA INC',
 'ARES MANAGEMENT CORP',
 'ARMSTRONG WORLD INDUSTRIES INC',
 'ASHLAND GLOBAL HOLDINGS INC',
 'ATMOS ENERGY CORP',
 'AVANGRID INC',
 'AVISTA CORP',
 'BARRICK GOLD CORP',
 'BATTALION OIL CORP',
 'BERKSHIRE HATHAWAY INC',
 'BERRY CORP',
 'BERRY GLOBAL GROUP INC',
 'BIOGEN INC',
 'BLACK HILLS CORP',
 'BOISE CASCADE 

In [201]:
stock = join_emitters.join(iex.set_index('name'))

In [55]:
a = join_emitters[join_emitters['PARENT COMPANY NAME'] == 'ONEOK INC']

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,FRS ID (FACILITY),REPORTING YEAR,FACILITY NAME,FACILITY ADDRESS,FACILITY CITY,FACILITY STATE,FACILITY ZIP,FACILITY COUNTY,PARENT COMPANY NAME,PARENT CO. STREET ADDRESS,PARENT CO. CITY,PARENT CO. STATE,PARENT CO. ZIP,PARENT CO. PERCENT OWNERSHIP,FACILITY NAICS CODE,PARENT EMISSIONS AMT
1003671,VGT CS 2207,486210,7434.578,Pipeline Transportation of Natural Gas,110006000000.0,2020,VGT CS 2207,1611 COUNTY HWY 142,ADA,MN,56510,NORMAN COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,486210,7434.578
1003681,WTX - CS Red River CS No. 1 (Wheeler),486210,45020.824,Pipeline Transportation of Natural Gas,110070700000.0,2020,WTX - CS Red River CS No. 1 (Wheeler),,WHEELER,TX,79096,WHEELER COUNTY,ONEOK INC,"100 West Fifth Street, P.O. Box 871",Tulsa,OK,74103,100.0,486210,45020.824
1003682,WTX - CS Red River CS No. 2 (Lamesa),486210,41595.03,Pipeline Transportation of Natural Gas,110070700000.0,2020,WTX - CS Red River CS No. 2 (Lamesa),,LAMESA,TX,79331,DAWSON COUNTY,ONEOK INC,"100 West Fifth Street, P.O. Box 871",Tulsa,OK,74103,100.0,486210,41595.03
1003727,OFS GPRP Panther Creek Gas Plant,211130,4641.832,Natural Gas Extraction,110070100000.0,2020,OFS GPRP Panther Creek Gas Plant,,Canute,OK,73626,CUSTER COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,211130,4641.832
1003736,OFS GPRP Custer Gas Plant,211130,65831.226,Natural Gas Extraction,110013900000.0,2020,OFS GPRP Custer Gas Plant,,CUSTER CITY,OK,73639,CUSTER COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,211130,65831.226
1003955,OFS GPRP Antelope Hills Gas Plant,211130,26518.436,Natural Gas Extraction,110007400000.0,2020,OFS GPRP Antelope Hills Gas Plant,,Crawford,OK,73638,ROGER MILLS COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,211130,26518.436
1003995,GPLC JOLIET SALES,486210,19259.394,Pipeline Transportation of Natural Gas,110055500000.0,2020,GPLC JOLIET SALES,,Joliet,IL,60410,WILL COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,486210,19259.394
1004002,MGT Potomac 2118,486210,15310.56,Pipeline Transportation of Natural Gas,110055500000.0,2020,MGT Potomac 2118,27385 N. Road 620 East,Potomac,IL,61865,VERMILION COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,486210,15310.56
1004012,VGT CS 2204,486210,6347.582,Pipeline Transportation of Natural Gas,,2020,VGT CS 2204,County Road 20,Angus,MN,56712,,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,486210,6347.582
1004418,OHL NGLP Medford Plant,211130,200108.898,Natural Gas Extraction,110000700000.0,2020,OHL NGLP Medford Plant,25923 US Highway 81,MEDFORD,OK,73759,GRANT COUNTY,ONEOK INC,100 West Fifth Street,Tulsa,OK,74103,100.0,211130,200108.898


In [56]:
b = iex[iex['name'] == 'ONEOK INC']

Unnamed: 0,symbol,exchange,exchangeSuffix,exchangeName,exchangeSegment,exchangeSegmentName,name,date,type,iexId,region,currency,isEnabled,figi,cik,lei
7855,OKE,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,ONEOK INC,2022-04-16,cs,IEX_4E47573937502D52,US,USD,True,BBG000BQHGR6,1039684.0,2T3D6M0JSY48PSZI1Q41


In [202]:
p = join_emitters.set_index('PARENT COMPANY NAME').join(iex.set_index('name'))
p = p[~p['symbol'].isnull()]

In [173]:
p

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,Industry Type (sectors),Longitude,Latitude,FRS ID (FACILITY),REPORTING YEAR,FACILITY NAME,...,exchangeSegmentName,date,type,iexId,region,currency,isEnabled,figi,cik,lei
ABBOTT LABORATORIES,Abbott Laboratories,311514,27938.244,"Dry, Condensed, and Evaporated Dairy Product M...",Other,-111.783120,32.894000,1.100005e+11,2020,Abbott Laboratories,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_53484E444C542D52,US,USD,True,BBG000B9ZXB4,1800.0,HQD377W2YR662HK5JX27
ABBOTT LABORATORIES,ABBOTT PARK FACILITY,424210,55009.392,Drugs and Druggists' Sundries Merchant Wholesa...,Other,-87.890650,42.302270,1.100180e+11,2020,ABBOTT PARK FACILITY,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_53484E444C542D52,US,USD,True,BBG000B9ZXB4,1800.0,HQD377W2YR662HK5JX27
ABBOTT LABORATORIES,Abbott Nutrition - A Division of Abbott Labora...,311514,28943.892,"Dry, Condensed, and Evaporated Dairy Product M...",Other,-79.267778,37.130556,1.100207e+11,2020,Abbott Nutrition - A Division of Abbott Labora...,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_53484E444C542D52,US,USD,True,BBG000B9ZXB4,1800.0,HQD377W2YR662HK5JX27
ABBVIE INC,North Chicago Facility,325411,64032.708,Medicinal and Botanical Manufacturing,Chemicals,-87.838550,42.333970,1.100701e+11,2020,North Chicago Facility,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_4334484252542D52,US,USD,True,BBG0025Y4RY4,1551152.0,FR5LCKFTG8054YNNRU85
ABBVIE INC,ABBVIE LTD.,325412,64383.468,Pharmaceutical Preparation Manufacturing,Chemicals,-66.570509,18.433237,1.100078e+11,2020,ABBVIE LTD.,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_4334484252542D52,US,USD,True,BBG0025Y4RY4,1551152.0,FR5LCKFTG8054YNNRU85
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WESTROCK CO,MW Custom Paper LLC- Holston Landfill,322121,22786.000,Paper (except Newsprint) Mills,Waste,-82.392890,36.507018,1.100555e+11,2020,MW Custom Paper LLC- Holston Landfill,...,New York Stock Exchange Inc,2022-04-16,cs,IEX_485031335A442D52,US,USD,True,BBG008NXC572,1732845.0,549300JEB576INN13W07
WESTROCK CO,"WestRock Container, LLC",322130,52713.752,Paperboard Mills,Pulp and Paper,-81.780540,35.019210,1.100020e+11,2020,"WestRock Container, LLC",...,New York Stock Exchange Inc,2022-04-16,cs,IEX_485031335A442D52,US,USD,True,BBG008NXC572,1732845.0,549300JEB576INN13W07
WESTROCK CO,"WestRock - Solvay, LLC",322130,174068.314,Paperboard Mills,Pulp and Paper,-76.204630,43.063850,1.100098e+11,2020,"WestRock - Solvay, LLC",...,New York Stock Exchange Inc,2022-04-16,cs,IEX_485031335A442D52,US,USD,True,BBG008NXC572,1732845.0,549300JEB576INN13W07
WESTROCK CO,"Seven Hills Paperboard, LLC",322130,30234.588,Paperboard Mills,Pulp and Paper,-79.128076,37.403741,1.100582e+11,2020,"Seven Hills Paperboard, LLC",...,New York Stock Exchange Inc,2022-04-16,cs,IEX_485031335A442D52,US,USD,True,BBG008NXC572,1732845.0,549300JEB576INN13W07


In [79]:
p[['Name', 'symbol', 'Emissions (tCO2e)', 'PARENT CO. PERCENT OWNERSHIP']].to_csv('happy.csv')

In [203]:
spy = pd.read_json('etf-conversions/full_carbon_stock_data.json')
spy['companyName'] = spy['companyName'].str.upper()
spy

Unnamed: 0,companyName,marketcap,week52high,week52low,week52highSplitAdjustOnly,week52lowSplitAdjustOnly,week52change,sharesOutstanding,float,avg10Volume,...,maxChangePercent,year5ChangePercent,year2ChangePercent,year1ChangePercent,ytdChangePercent,month6ChangePercent,month3ChangePercent,month1ChangePercent,day30ChangePercent,day5ChangePercent
0,NORTHWESTERN CORP.,3239315309,67.94,52.51,70.80,53.66,-0.103191,54132943,0,298844,...,0.787378,0.210515,0.056592,-0.085435,0.057954,0.069819,0.042632,0.006729,0.015098,-0.018051
1,TENARIS S.A.,19325387907,33.10,18.57,33.10,18.80,0.527152,590268415,0,2507400,...,-0.072647,0.131779,1.716336,0.538353,0.569511,0.441643,0.375052,0.236405,0.233145,0.042343
2,ALEXANDER & BALDWIN INC.,1635308190,26.16,16.67,26.57,17.21,0.318295,72680364,0,416010,...,-0.044963,0.090169,1.034045,0.325822,-0.096095,-0.045538,-0.064787,-0.032882,-0.058183,0.018100
3,BOISE CASCADE CO,2808263396,84.94,46.28,85.06,46.51,0.090916,39447442,0,315662,...,2.030251,1.751421,1.882957,0.110347,0.001329,0.215988,0.013428,-0.084726,-0.096803,0.044607
4,MDU RESOURCES GROUP INC,5525039606,34.02,25.18,35.02,25.18,-0.148484,203350740,0,1881791,...,0.253634,0.253634,0.282996,-0.140372,-0.111569,-0.108253,-0.100486,0.055146,0.049035,0.013806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,EDISON INTERNATIONAL,27356398002,72.64,52.35,72.64,54.14,0.222900,380796186,0,1838118,...,1.471820,0.081089,0.357031,0.221308,0.063274,0.278437,0.138330,0.097362,0.094383,-0.001529
245,CMS ENERGY CORPORATION,20759290187,73.64,56.70,73.64,57.92,0.148370,290136830,0,2027747,...,5.836749,0.822949,0.246752,0.160686,0.107784,0.203753,0.130375,0.063941,0.076425,-0.013240
246,SPIRE INC.,3906623881,76.55,58.37,77.87,59.60,0.016621,51750217,0,490105,...,1.951791,0.297815,0.098046,0.014781,0.169141,0.198404,0.142685,0.113586,0.125205,-0.007364
247,VISTRA CORP,10597061646,24.65,14.99,24.65,15.47,0.393092,440992994,0,5649584,...,0.734818,0.734818,0.577413,0.383762,0.063401,0.239318,0.071878,0.084357,0.071878,-0.019584


In [87]:
spyNames = spy['companyName']
matches = []
for name in uniqueEmitters:
    for i, spyName in enumerate(spyNames):
        if name in spyName:
            spy['companyName'][i] = name
            matches.append((name, spyName))
            break
matches, len(matches)

([('ALPHABET INC', 'ALPHABET INC'),
  ('CHS INC', 'CHS INC.'),
  ('CVR ENERGY INC', 'CVR ENERGY INC'),
  ('EVERSOURCE ENERGY', 'EVERSOURCE ENERGY'),
  ('PBF ENERGY INC', 'PBF ENERGY INC'),
  ('MARTIN MIDSTREAM PARTNERS LP', 'MARTIN MIDSTREAM PARTNERS LP'),
  ('PAR PACIFIC HOLDINGS INC', 'PAR PACIFIC HOLDINGS INC'),
  ('RANGE RESOURCES CORP', 'RANGE RESOURCES CORP'),
  ('NRG ENERGY INC', 'NRG ENERGY INC.'),
  ('TREDEGAR CORP', 'TREDEGAR CORP.'),
  ('ABBVIE INC', 'ABBVIE INC'),
  ('INGEVITY CORP', 'INGEVITY CORP'),
  ('COMPASS MINERALS INTERNATIONAL INC', 'COMPASS MINERALS INTERNATIONAL INC'),
  ('AEMETIS INC', 'AEMETIS INC'),
  ('ORANGE COUNTY', 'ORANGE COUNTY BANCORP INC'),
  ('FORD MOTOR CO', 'FORD MOTOR CO.'),
  ('NEWMONT CORP', 'NEWMONT CORP'),
  ('TEXAS INSTRUMENTS INC', 'TEXAS INSTRUMENTS INC.'),
  ('VISTRA CORP', 'VISTRA CORP'),
  ('MARATHON OIL CORP', 'MARATHON OIL CORPORATION'),
  ('NEXTERA ENERGY INC', 'NEXTERA ENERGY INC'),
  ('KELLOGG CO', 'KELLOGG CO'),
  ('NEW YORK CITY', 

In [204]:
fullFrame = p.join(spy.set_index('companyName'))

ABBOTT LABORATORIES   -111.783120
ABBOTT LABORATORIES    -87.890650
ABBOTT LABORATORIES    -79.267778
ABBVIE INC             -87.838550
ABBVIE INC             -66.570509
                          ...    
WESTROCK CO            -82.392890
WESTROCK CO            -81.780540
WESTROCK CO            -76.204630
WESTROCK CO            -79.128076
WESTROCK CO            -82.910480
Name: Longitude, Length: 719, dtype: float64

In [182]:
fullFrame['Industry Type (sectors)']

ABBOTT LABORATORIES             Other
ABBOTT LABORATORIES             Other
ABBOTT LABORATORIES             Other
ABBVIE INC                  Chemicals
ABBVIE INC                  Chemicals
                            ...      
WESTROCK CO                     Waste
WESTROCK CO            Pulp and Paper
WESTROCK CO            Pulp and Paper
WESTROCK CO            Pulp and Paper
WESTROCK CO              Power Plants
Name: Industry Type (sectors), Length: 719, dtype: object

In [205]:
fullFrame[['Name', 'symbol', 'Emissions (tCO2e)', 'Methane (CH4) emissions ', 'Nitrous Oxide (N2O) emissions ',  'marketcap', 'sharesOutstanding', 'Longitude', 'Latitude', 'Industry Type (sectors)', 'PARENT CO. PERCENT OWNERSHIP']].to_csv('imsofuckinghappy.csv')




