In [206]:
import pandas as pd
import requests, zipfile, io
!pip install pyxlsb
import pyxlsb
from IPython.core.display import HTML
import yfinance as yf

import warnings
warnings.filterwarnings('ignore')

# Set up some default parameters for graphing
import matplotlib.pyplot as plt
from matplotlib import cycler
colour = "#00C2AB" # The default colour for the barcharts
colors = cycler('color', ['#4FBBA9', '#E56D13', '#D43A69','#25539f', '#88BB44', '#FFBBBB'])
plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',
       axisbelow=True, grid=True, prop_cycle=colors)
plt.rc('grid', color='w', linestyle='solid')
plt.rc('xtick', direction='out', color='gray')
plt.rc('ytick', direction='out', color='gray')
plt.rc('patch', edgecolor='#E6E6E6')
plt.rc('lines', linewidth=2)
font = {'family' : 'DejaVu Sans',
        'weight' : 'normal',
        'size'   : 16}
plt.rc('font', **font)
plt.rc('legend', fancybox = True, framealpha=1, shadow=True, borderpad=1)



In [207]:
# We will load in the sheets in the unzipped XLSX file that contain various kinds of direct emissions, from individual facilities and sectors/activities
ghg_fac = pd.read_excel("ghgp_data_2020.xlsx",
  skiprows = 1, # skip the first two rows, which are irrelevant
  header = 2, # the third row represents the header
  converters={'Primary NAICS Code':str}, # read the NAICS codes as strings
  sheet_name = ["Direct Emitters", "Onshore Oil & Gas Prod.", "Gathering & Boosting", 
    "Transmission Pipelines", "LDC - Direct Emissions", "SF6 from Elec. Equip.", "Suppliers"]) 

# We will also load in a file that tells us about the type of industry each facility is classified as
naics = pd.read_excel("https://www.census.gov/naics/2017NAICS/6-digit_2017_Codes.xlsx",
  header = 0, # the row that represents the header
  converters={'2017 NAICS Code':str,'2017 NAICS Title':str}
)
naics=naics[["2017 NAICS Code", "2017 NAICS Title"]]

In [233]:
ghg_fac["Suppliers"]["Emissions (tCO2e)"] = pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Coal-based liquid fuel production"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Produced"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Imported"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Petroleum Products Exported"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Natural Gas Supply"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with Natural Gas Liquids Supply"], errors='coerce').fillna(0) + \
  pd.to_numeric(ghg_fac["Suppliers"]["GHG Quantity Associated with CO2 Supply "], errors='coerce').fillna(0)

emissions_field_names = {
    "Direct Emitters": "Total reported direct emissions",
    "Onshore Oil & Gas Prod.": "Total reported emissions from Onshore Oil & Gas Production ",
    "Gathering & Boosting": "Total reported emissions from Gathering & Boosting",
    "Transmission Pipelines": "Total reported direct emissions from Transmission Pipelines",
    "LDC - Direct Emissions": "Total reported direct emissions from Local Distribution Companies",
    "SF6 from Elec. Equip.": "Total reported direct emissions from Electrical Equipment Use",
}

fac_directemitters_data = pd.DataFrame([], columns=["Facility", "Name", "Primary NAICS Code", "Emissions (tCO2e)"]) 
fac_suppliers_data = pd.DataFrame(ghg_fac["Suppliers"]) 

for key in emissions_field_names.keys():
  dump = ghg_fac[key]
  dump = dump[["Facility Id", "Facility Name", "Primary NAICS Code", emissions_field_names[key]]]
  dump = dump.rename(columns={"Facility Id": "Facility", "Facility Name": "Name", "Primary NAICS Code": "2017 NAICS Code", emissions_field_names[key]:"Emissions (tCO2e)"})
  dump["Emissions (tCO2e)"] = pd.to_numeric(dump["Emissions (tCO2e)"])
  fac_directemitters_data= fac_directemitters_data.append(dump)

fac_directemitters_data = fac_directemitters_data.groupby(by=["Facility", "Name", "2017 NAICS Code"]).sum().reset_index() #For presentation purposes, we'll add in the facility name
fac_suppliers_data=fac_suppliers_data.rename(columns={"Facility Id": "Facility", "Facility Name": "Name", "Primary NAICS Code": "2017 NAICS Code"})
fac_suppliers_data = fac_suppliers_data[["Facility", "Name", "2017 NAICS Code", "Emissions (tCO2e)"]]
fac_directemitters_data['2017 NAICS Code']=fac_directemitters_data['2017 NAICS Code'].astype(str)

# Look up industry information
fac_directemitters_data = fac_directemitters_data.merge(naics, on="2017 NAICS Code")
fac_suppliers_data = fac_suppliers_data.merge(naics, on="2017 NAICS Code")

# sectors = ghg_fac['Direct Emitters'][["Facility Id","Industry Type (sectors)", "Longitude", "Latitude", "Methane (CH4) emissions ", "Nitrous Oxide (N2O) emissions "]]

# fac_directemitters_data = fac_directemitters_data.set_index("Facility").join(sectors.set_index('Facility Id'))
# fac_suppliers_data = fac_suppliers_data.set_index("Facility").join(sectors.set_index('Facility Id'))


In [234]:
url = "https://www.epa.gov/system/files/other-files/2021-10/ghgp_data_parent_company_10_2021.xlsb"
parents = pd.read_excel(url, engine='pyxlsb', sheet_name="2020", nrows=9144) # 9144 non-header rows in this file as of Nov 2021.
parents["PARENT COMPANY NAME"] = parents["PARENT COMPANY NAME"].str.upper() # Standardize company names to uppercase - sometimes EPA allows company names to be reported in upper and lower case, which intereferes with our ability to aggregate them
parents.columns

Index(['GHGRP FACILITY ID', 'FRS ID (FACILITY)', 'REPORTING YEAR',
       'FACILITY NAME', 'FACILITY ADDRESS', 'FACILITY CITY', 'FACILITY STATE',
       'FACILITY ZIP', 'FACILITY COUNTY', 'PARENT COMPANY NAME',
       'PARENT CO. STREET ADDRESS', 'PARENT CO. CITY', 'PARENT CO. STATE',
       'PARENT CO. ZIP', 'PARENT CO. PERCENT OWNERSHIP',
       'FACILITY NAICS CODE'],
      dtype='object')

In [236]:
join_emitters = fac_directemitters_data.set_index("Facility").join(parents.set_index('GHGRP FACILITY ID'))
join_suppliers = fac_suppliers_data.set_index("Facility").join(parents.set_index('GHGRP FACILITY ID'))

In [212]:
# plt.hist(join_suppliers['Emissions (tCO2e)'])
# join_emitters['Longitude']

In [237]:
nas_s = join_suppliers.loc[join_suppliers["Emissions (tCO2e)"]  == 0]

In [238]:
nas_s.columns

Index(['Name', '2017 NAICS Code', 'Emissions (tCO2e)', '2017 NAICS Title',
       'FRS ID (FACILITY)', 'REPORTING YEAR', 'FACILITY NAME',
       'FACILITY ADDRESS', 'FACILITY CITY', 'FACILITY STATE', 'FACILITY ZIP',
       'FACILITY COUNTY', 'PARENT COMPANY NAME', 'PARENT CO. STREET ADDRESS',
       'PARENT CO. CITY', 'PARENT CO. STATE', 'PARENT CO. ZIP',
       'PARENT CO. PERCENT OWNERSHIP', 'FACILITY NAICS CODE'],
      dtype='object')

In [239]:
nas_s['PARENT COMPANY NAME']

1000035                             THE ANDERSONS INC
1000035                       MARATHON PETROLEUM CORP
1000035                       MARATHON PETROLEUM CORP
1000039                           GENERAL ELECTRIC CO
1000158     ASCEND PERFORMANCE MATERIALS HOLDINGS INC
                              ...                    
1014033                           AW DISTRIBUTING INC
1014041                                     BLUON INC
1014043                          RAMJ ENTERPRISES INC
1014044                      WILHELMSEN SHIPS SERVICE
1014063    TRANSOCEAN OFFSHORE DEEPWATER DRILLING INC
Name: PARENT COMPANY NAME, Length: 364, dtype: object

In [240]:
join_emitters["PARENT EMISSIONS AMT"] = join_emitters['Emissions (tCO2e)'] * join_emitters["PARENT CO. PERCENT OWNERSHIP"] / 100 
join_suppliers["PARENT EMISSIONS AMT"] = join_suppliers['Emissions (tCO2e)'] * join_suppliers["PARENT CO. PERCENT OWNERSHIP"] / 100 
summary_e = join_emitters.groupby("PARENT COMPANY NAME")[["PARENT EMISSIONS AMT"]].agg(['sum', 'count'])
summary_s = join_suppliers.groupby("PARENT COMPANY NAME")[["PARENT EMISSIONS AMT"]].agg(['sum', 'count'])

In [241]:
len(set(join_emitters["PARENT COMPANY NAME"]))

2941

In [242]:
join_emitters

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,FRS ID (FACILITY),REPORTING YEAR,FACILITY NAME,FACILITY ADDRESS,FACILITY CITY,FACILITY STATE,FACILITY ZIP,FACILITY COUNTY,PARENT COMPANY NAME,PARENT CO. STREET ADDRESS,PARENT CO. CITY,PARENT CO. STATE,PARENT CO. ZIP,PARENT CO. PERCENT OWNERSHIP,FACILITY NAICS CODE,PARENT EMISSIONS AMT
1000001,PSE Ferndale Generating Station,221112,509154.612,Fossil Fuel Electric Power Generation,1.100005e+11,2020,PSE Ferndale Generating Station,5105 LAKE TERRELL ROAD,FERNDALE,WA,98248,WHATCOM COUNTY,PUGET HOLDINGS LLC,PO Box 97034,Bellevue,WA,98009,100.0,221112,509154.612
1000002,Ardagh Glass Inc. (Dunkirk),327213,113519.804,Glass Container Manufacturing,1.100412e+11,2020,Ardagh Glass Inc. (Dunkirk),524 E. CENTER STREET,DUNKIRK,IN,47336,JAY COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,113519.804
1000003,Ardagh Glass Inc. (Henderson),327213,81110.872,Glass Container Manufacturing,1.100015e+11,2020,Ardagh Glass Inc. (Henderson),620 Facet Road,Henderson,NC,27537,VANCE COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,81110.872
1000005,Ardagh Glass Inc. (Madera),327213,79854.076,Glass Container Manufacturing,1.100005e+11,2020,Ardagh Glass Inc. (Madera),24441 AVENUE 12 & ROAD 24 1/2,MADERA,CA,93637,MADERA COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,79854.076
1000007,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,327420,14084.496,Gypsum Product Manufacturing,1.100070e+11,2020,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,1000 N HILL RD,BERNALILLO,NM,87004,SANDOVAL COUNTY,EAGLE MATERIALS INC,5960 Berkshire Lane Suite 800,Dallas,TX,75225,100.0,327420,14084.496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014015,Cedar Grove #2 Mine,212112,19219.500,Bituminous Coal Underground Mining,,2020,Cedar Grove #2 Mine,"County Route 14, Rum Creek Road",Yolyn,WV,25654,,ARACOMA COAL CO LLC,PO Box 1098,Holden,WV,25625,100.0,212112,19219.500
1014021,"Peco Foods, Inc.",311615,87368.842,Poultry Processing,1.100674e+11,2020,"Peco Foods, Inc.",4114 Hwy 67 South,Pocahontas,AR,72455,RANDOLPH COUNTY,PECO FOODS INC,1101 Greensboro Ave,Tuscaloosa,AL,35401,100.0,311615,87368.842
1014023,JSW Steel (USA) Inc.,331110,43100.388,Iron and Steel Mills and Ferroalloy Manufactur...,1.100007e+11,2020,JSW Steel (USA) Inc.,5200 E. McKinney Road,Baytown,TX,77523,CHAMBERS COUNTY,JSW STEEL USA INC,5200 East McKinney Road,Baytown,TX,77520,100.0,331110,43100.388
1014024,"TITUS OIL & GAS PRODUCTION, LLC",211120,53440.912,Crude Petroleum Extraction,,2020,"TITUS OIL & GAS PRODUCTION, LLC",420 Throckmorton St. Ste 1150,Fort Worth,TX,76102,,TITUS OIL & GAS,"420 Throckmorton St, Suite 1150",Fort Worth,TX,76102,100.0,211120,53440.912


In [169]:
uniqueEmitters = list(set(join_emitters["PARENT COMPANY NAME"]))

In [83]:
def getStock(search_term):
    results = []
    query = requests.get(f'https://yfapi.net/v6/finance/autocomplete?region=IN&lang=en&query={search_term}', 
    headers={
        'accept': 'application/json',
        'X-API-KEY': '67OTHABAMI9EcpD7FKoMbHerxF8yNY35fPHdU5mc'
    })
    response = query.json()
    for i in response['ResultSet']['Result']:
        final = i['symbol']
        results.append(final)
    
    symbol = None
        
    try:
        stock = yf.Ticker(results[0])
        price = stock.info["regularMarketPrice"]
        full_name = stock.info['longName']
        curreny = stock.info["currency"]
        symbol = stock.info["symbol"]
    except Exception as e:
        pass
        # print('Something went wrong')
        # print(e)
        
    return stock.info

In [198]:
ticker = {}

In [243]:
iex = pd.read_json('etf-conversions/iex_symbols.json')
iex['name'] = iex['name'].str.upper()

In [220]:
iex['name']

0                                AGILENT TECHNOLOGIES INC.
1                                               ALCOA CORP
2        LISTED FUNDS TRUST - AAF FIRST PRIORITY CLO BO...
3        GOLDMAN SACHS PHYSICAL GOLD ETF TRUST - GOLDMA...
4                   ARES ACQUISITION CORPORATION - CLASS A
                               ...                        
11883                            ZURN WATER SOLUTIONS CORP
11884                                         ZYMERGEN INC
11885                                        ZYMEWORKS INC
11886                          ZYNERBA PHARMACEUTICALS INC
11887                                            ZYNEX INC
Name: name, Length: 11888, dtype: object

In [83]:
iexNames = iex['name']
matches = []
for name in uniqueEmitters:
    for i, iexName in enumerate(iexNames):
        if name in iexName:
            iex['name'][i] = name
            
            matches.append(name)
            break
matches, len(set(matches))

(['ALPHABET INC',
  'CHS INC',
  'CVR ENERGY INC',
  'EVERSOURCE ENERGY',
  'PBF ENERGY INC',
  'MARTIN MIDSTREAM PARTNERS LP',
  'PAR PACIFIC HOLDINGS INC',
  'RANGE RESOURCES CORP',
  'NRG ENERGY INC',
  'TREDEGAR CORP',
  'ABBVIE INC',
  'INGEVITY CORP',
  'COMPASS MINERALS INTERNATIONAL INC',
  'AEMETIS INC',
  'ORANGE COUNTY',
  'FORD MOTOR CO',
  'NEWMONT CORP',
  'TEXAS INSTRUMENTS INC',
  'VISTRA CORP',
  'MARATHON OIL CORP',
  'NEXTERA ENERGY INC',
  'KELLOGG CO',
  'NEW YORK CITY',
  'PPL CORP',
  'PARK-OHIO HOLDINGS CORP',
  'EAGLE MATERIALS INC',
  'SUMMIT MATERIALS INC',
  'CLEARWAY ENERGY INC',
  'OASIS PETROLEUM INC',
  'MOLSON COORS BEVERAGE CO',
  'NATURAL RESOURCE PARTNERS LP',
  'HERITAGE-CRYSTAL CLEAN INC',
  'NEENAH INC',
  'ALPHA METALLURGICAL RESOURCES',
  'BIOGEN INC',
  'ARCH RESOURCES INC',
  'AMPCO-PITTSBURGH CORP',
  'MURPHY OIL CORP',
  '3M CO',
  'CONAGRA BRANDS INC',
  'HUNTINGTON INGALLS INDUSTRIES INC',
  'WASTE CONNECTIONS INC',
  'ENTERGY CORP',
  'PI

In [84]:
sorted(matches)

['3M CO',
 'ABB LTD',
 'ABBOTT LABORATORIES',
 'ABBVIE INC',
 'ADVANSIX INC',
 'AEMETIS INC',
 'AES CORP',
 'AIR PRODUCTS & CHEMICALS INC',
 'ALBEMARLE CORP',
 'ALCOA CORP',
 'ALEXANDER & BALDWIN INC',
 'ALLEGHENY TECHNOLOGIES INC',
 'ALLIANT ENERGY CORP',
 'ALLISON TRANSMISSION HOLDINGS INC',
 'ALPHA & OMEGA SEMICONDUCTOR',
 'ALPHA METALLURGICAL RESOURCES',
 'ALPHABET INC',
 'ALTO INGREDIENTS INC',
 'ALTRIA GROUP INC',
 'AMEREN CORP',
 'AMERICAN AIRLINES GROUP INC',
 'AMGEN INC',
 'AMPCO-PITTSBURGH CORP',
 'AMPLIFY ENERGY CORP',
 'ANALOG DEVICES INC',
 'ANTERO RESOURCES CORP',
 'ARCH RESOURCES INC',
 'ARCHER DANIELS MIDLAND CO',
 'ARCHROCK INC',
 'ARCONIC CORP',
 'ARCOSA INC',
 'ARES MANAGEMENT CORP',
 'ARMSTRONG WORLD INDUSTRIES INC',
 'ASHLAND GLOBAL HOLDINGS INC',
 'ATMOS ENERGY CORP',
 'AVANGRID INC',
 'AVISTA CORP',
 'BARRICK GOLD CORP',
 'BATTALION OIL CORP',
 'BERKSHIRE HATHAWAY INC',
 'BERRY CORP',
 'BERRY GLOBAL GROUP INC',
 'BIOGEN INC',
 'BLACK HILLS CORP',
 'BOISE CASCADE 

In [244]:
stock = join_emitters.join(iex.set_index('name'))

In [222]:
a = join_emitters[join_emitters['PARENT COMPANY NAME'] == 'ONEOK INC']

In [56]:
b = iex[iex['name'] == 'ONEOK INC']

Unnamed: 0,symbol,exchange,exchangeSuffix,exchangeName,exchangeSegment,exchangeSegmentName,name,date,type,iexId,region,currency,isEnabled,figi,cik,lei
7855,OKE,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,ONEOK INC,2022-04-16,cs,IEX_4E47573937502D52,US,USD,True,BBG000BQHGR6,1039684.0,2T3D6M0JSY48PSZI1Q41


In [400]:
p = join_emitters.set_index('PARENT COMPANY NAME').join(iex.set_index('name'))
p = p[~p['symbol'].isnull()]

In [379]:
len(p)

714

In [79]:
p[['Name', 'symbol', 'Emissions (tCO2e)', 'PARENT CO. PERCENT OWNERSHIP']].to_csv('happy.csv')

In [401]:
spy = pd.read_json('etf-conversions/full_carbon_stock_data.json')
spy['companyName'] = spy['companyName'].str.upper()
spy = spy.rename(columns={"companyName": "Name"})

In [87]:
spyNames = spy['companyName']
matches = []
for name in uniqueEmitters:
    for i, spyName in enumerate(spyNames):
        if name in spyName:
            spy['companyName'][i] = name
            matches.append((name, spyName))
            break
matches, len(matches)

([('ALPHABET INC', 'ALPHABET INC'),
  ('CHS INC', 'CHS INC.'),
  ('CVR ENERGY INC', 'CVR ENERGY INC'),
  ('EVERSOURCE ENERGY', 'EVERSOURCE ENERGY'),
  ('PBF ENERGY INC', 'PBF ENERGY INC'),
  ('MARTIN MIDSTREAM PARTNERS LP', 'MARTIN MIDSTREAM PARTNERS LP'),
  ('PAR PACIFIC HOLDINGS INC', 'PAR PACIFIC HOLDINGS INC'),
  ('RANGE RESOURCES CORP', 'RANGE RESOURCES CORP'),
  ('NRG ENERGY INC', 'NRG ENERGY INC.'),
  ('TREDEGAR CORP', 'TREDEGAR CORP.'),
  ('ABBVIE INC', 'ABBVIE INC'),
  ('INGEVITY CORP', 'INGEVITY CORP'),
  ('COMPASS MINERALS INTERNATIONAL INC', 'COMPASS MINERALS INTERNATIONAL INC'),
  ('AEMETIS INC', 'AEMETIS INC'),
  ('ORANGE COUNTY', 'ORANGE COUNTY BANCORP INC'),
  ('FORD MOTOR CO', 'FORD MOTOR CO.'),
  ('NEWMONT CORP', 'NEWMONT CORP'),
  ('TEXAS INSTRUMENTS INC', 'TEXAS INSTRUMENTS INC.'),
  ('VISTRA CORP', 'VISTRA CORP'),
  ('MARATHON OIL CORP', 'MARATHON OIL CORPORATION'),
  ('NEXTERA ENERGY INC', 'NEXTERA ENERGY INC'),
  ('KELLOGG CO', 'KELLOGG CO'),
  ('NEW YORK CITY', 

In [402]:
fullFrame = p.merge(spy.set_index('Name'), on='Name', how='left')

In [404]:
fullFrame.set_index('PARENT COMPANY NAME')

KeyError: "None of ['PARENT COMPANY NAME'] are in the columns"

In [182]:
fullFrame['Industry Type (sectors)']

ABBOTT LABORATORIES             Other
ABBOTT LABORATORIES             Other
ABBOTT LABORATORIES             Other
ABBVIE INC                  Chemicals
ABBVIE INC                  Chemicals
                            ...      
WESTROCK CO                     Waste
WESTROCK CO            Pulp and Paper
WESTROCK CO            Pulp and Paper
WESTROCK CO            Pulp and Paper
WESTROCK CO              Power Plants
Name: Industry Type (sectors), Length: 719, dtype: object

In [249]:
fullFrame.to_csv('imsofuckinghappy.csv')

In [304]:
fullFrame

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,FRS ID (FACILITY),REPORTING YEAR,FACILITY NAME,FACILITY ADDRESS,FACILITY CITY,FACILITY STATE,...,maxChangePercent,year5ChangePercent,year2ChangePercent,year1ChangePercent,ytdChangePercent,month6ChangePercent,month3ChangePercent,month1ChangePercent,day30ChangePercent,day5ChangePercent
ABBOTT LABORATORIES,Abbott Laboratories,311514,27938.244,"Dry, Condensed, and Evaporated Dairy Product M...",1.100005e+11,2020,Abbott Laboratories,1250 West Maricopa Highway,Casa Grande,AZ,...,6.053979,1.998303,0.333616,-0.035930,-0.157483,0.009242,-0.078035,0.005781,-0.011056,-0.041306
ABBOTT LABORATORIES,ABBOTT PARK FACILITY,424210,55009.392,Drugs and Druggists' Sundries Merchant Wholesa...,1.100180e+11,2020,ABBOTT PARK FACILITY,100 ABBOTT PARK RD,ABBOTT PARK,IL,...,6.053979,1.998303,0.333616,-0.035930,-0.157483,0.009242,-0.078035,0.005781,-0.011056,-0.041306
ABBOTT LABORATORIES,Abbott Nutrition - A Division of Abbott Labora...,311514,28943.892,"Dry, Condensed, and Evaporated Dairy Product M...",1.100207e+11,2020,Abbott Nutrition - A Division of Abbott Labora...,1518 MAIN STREET,ALTAVISTA,VA,...,6.053979,1.998303,0.333616,-0.035930,-0.157483,0.009242,-0.078035,0.005781,-0.011056,-0.041306
ABBVIE INC,North Chicago Facility,325411,64032.708,Medicinal and Botanical Manufacturing,1.100701e+11,2020,North Chicago Facility,1401 SHERIDAN RD,NORTH CHICAGO,IL,...,5.824537,2.181146,1.173375,0.583491,0.221477,0.512742,0.204750,0.050100,0.048955,-0.064417
ABBVIE INC,ABBVIE LTD.,325412,64383.468,Pharmaceutical Preparation Manufacturing,1.100078e+11,2020,ABBVIE LTD.,RD 2 KM 58.0,BARCELONETA,PR,...,5.824537,2.181146,1.173375,0.583491,0.221477,0.512742,0.204750,0.050100,0.048955,-0.064417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WESTROCK CO,MW Custom Paper LLC- Holston Landfill,322121,22786.000,Paper (except Newsprint) Mills,1.100555e+11,2020,MW Custom Paper LLC- Holston Landfill,1631 Highway 75,Blountville,TN,...,-0.047259,0.183147,0.720134,-0.051056,0.131532,0.083348,0.070022,0.084492,0.054711,0.058287
WESTROCK CO,"WestRock Container, LLC",322130,52713.752,Paperboard Mills,1.100020e+11,2020,"WestRock Container, LLC",139 Price Farm Road,Cowpens,SC,...,-0.047259,0.183147,0.720134,-0.051056,0.131532,0.083348,0.070022,0.084492,0.054711,0.058287
WESTROCK CO,"WestRock - Solvay, LLC",322130,174068.314,Paperboard Mills,1.100098e+11,2020,"WestRock - Solvay, LLC",53 Industrial Drive,Syracuse,NY,...,-0.047259,0.183147,0.720134,-0.051056,0.131532,0.083348,0.070022,0.084492,0.054711,0.058287
WESTROCK CO,"Seven Hills Paperboard, LLC",322130,30234.588,Paperboard Mills,1.100582e+11,2020,"Seven Hills Paperboard, LLC",1801 Concord Turnpike,Lynchburg,VA,...,-0.047259,0.183147,0.720134,-0.051056,0.131532,0.083348,0.070022,0.084492,0.054711,0.058287


In [270]:
uniqueEmitters = list(set(graphData["index"]))

In [356]:
join_emitters

Unnamed: 0,Name,2017 NAICS Code,Emissions (tCO2e),2017 NAICS Title,FRS ID (FACILITY),REPORTING YEAR,FACILITY NAME,FACILITY ADDRESS,FACILITY CITY,FACILITY STATE,FACILITY ZIP,FACILITY COUNTY,PARENT COMPANY NAME,PARENT CO. STREET ADDRESS,PARENT CO. CITY,PARENT CO. STATE,PARENT CO. ZIP,PARENT CO. PERCENT OWNERSHIP,FACILITY NAICS CODE,PARENT EMISSIONS AMT
1000001,PSE Ferndale Generating Station,221112,509154.612,Fossil Fuel Electric Power Generation,1.100005e+11,2020,PSE Ferndale Generating Station,5105 LAKE TERRELL ROAD,FERNDALE,WA,98248,WHATCOM COUNTY,PUGET HOLDINGS LLC,PO Box 97034,Bellevue,WA,98009,100.0,221112,509154.612
1000002,Ardagh Glass Inc. (Dunkirk),327213,113519.804,Glass Container Manufacturing,1.100412e+11,2020,Ardagh Glass Inc. (Dunkirk),524 E. CENTER STREET,DUNKIRK,IN,47336,JAY COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,113519.804
1000003,Ardagh Glass Inc. (Henderson),327213,81110.872,Glass Container Manufacturing,1.100015e+11,2020,Ardagh Glass Inc. (Henderson),620 Facet Road,Henderson,NC,27537,VANCE COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,81110.872
1000005,Ardagh Glass Inc. (Madera),327213,79854.076,Glass Container Manufacturing,1.100005e+11,2020,Ardagh Glass Inc. (Madera),24441 AVENUE 12 & ROAD 24 1/2,MADERA,CA,93637,MADERA COUNTY,ARDAGH GLASS INC,1209 Orange Street,Wilmington,DE,19801,100.0,327213,79854.076
1000007,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,327420,14084.496,Gypsum Product Manufacturing,1.100070e+11,2020,AMERICAN GYPSUM - BERNALILLO (WALLBOARD) PLANT,1000 N HILL RD,BERNALILLO,NM,87004,SANDOVAL COUNTY,EAGLE MATERIALS INC,5960 Berkshire Lane Suite 800,Dallas,TX,75225,100.0,327420,14084.496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014015,Cedar Grove #2 Mine,212112,19219.500,Bituminous Coal Underground Mining,,2020,Cedar Grove #2 Mine,"County Route 14, Rum Creek Road",Yolyn,WV,25654,,ARACOMA COAL CO LLC,PO Box 1098,Holden,WV,25625,100.0,212112,19219.500
1014021,"Peco Foods, Inc.",311615,87368.842,Poultry Processing,1.100674e+11,2020,"Peco Foods, Inc.",4114 Hwy 67 South,Pocahontas,AR,72455,RANDOLPH COUNTY,PECO FOODS INC,1101 Greensboro Ave,Tuscaloosa,AL,35401,100.0,311615,87368.842
1014023,JSW Steel (USA) Inc.,331110,43100.388,Iron and Steel Mills and Ferroalloy Manufactur...,1.100007e+11,2020,JSW Steel (USA) Inc.,5200 E. McKinney Road,Baytown,TX,77523,CHAMBERS COUNTY,JSW STEEL USA INC,5200 East McKinney Road,Baytown,TX,77520,100.0,331110,43100.388
1014024,"TITUS OIL & GAS PRODUCTION, LLC",211120,53440.912,Crude Petroleum Extraction,,2020,"TITUS OIL & GAS PRODUCTION, LLC",420 Throckmorton St. Ste 1150,Fort Worth,TX,76102,,TITUS OIL & GAS,"420 Throckmorton St, Suite 1150",Fort Worth,TX,76102,100.0,211120,53440.912


In [354]:
i = 1
graph = {"nodes": [], "links": []}
cid = {}

for company in join_emitters.iterrows():
    parent = company[1]["PARENT COMPANY NAME"]
    companyName = company[1]["Name"]
    co2 = company[1]["PARENT EMISSIONS AMT"]
    
    if parent not in cid:
        graph["nodes"].append({ "name": parent, "id": i, "group": i, "value": 1, "co2": co2 })
        cid[parent] = i
        i += 1
    
    graph["nodes"].append({ "name": companyName, "id": i, "group": cid[parent], "co2": 4 })
    graph["links"].append({ "source": cid[parent], "target": i })
    
    i += 1

In [355]:
import json
with open('graph.json', "w") as gjson:
    json.dump(graph, gjson)