# Scraping Lat/Longs for U.S. Power Plants from the Energy Information Administration

## Setup Python and R environment

In [2]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [3]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [4]:
%%R

# My commonly used R imports

require('tidyverse')

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Loading required package: tidyverse


## 👉 download your data

You can write code here to download your dataset. Or if you already have it, just leave the URL in the comments and just load it into a pandas or R (or both) dataframe.

## Merging U.S. power plants and water usage data from the Energy Information Administration

In [4]:
df_plants = pd.read_csv('plants.csv')

In [5]:
df_water = pd.read_csv('water_usage.csv')

In [6]:
df_plants.head()

Unnamed: 0,Plant Name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic


In [7]:
df_plants.shape

(12720, 7)

In [8]:
df_water.head()

Unnamed: 0,plant_name,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons
0,AES Alamitos Energy Center,--,--,--
1,AES Alamitos LLC,94464,94464,0
2,AES Huntington Beach Energy Project,--,--,--
3,AES Huntington Beach LLC,25818,25818,0
4,AES Petersburg,36421,33550,2872


In [9]:
df_water.shape

(749, 4)

In [10]:
df_plants.rename(columns={'Plant Name': 'plant_name'}, inplace=True)

In [11]:
df = pd.merge(df_plants, df_water, on='plant_name', how='left')

In [12]:
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,


In [13]:
df.shape

(12722, 10)

In [14]:
df[df['plant_name'] == 'AES Petersburg']

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons
136,AES Petersburg,994,IN,Electric Utility,"IC, ST","BIT, DFO",Multiple,36421,33550,2872


In [15]:
df.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
consumption_volume_million_gallons    object
dtype: object

In [16]:
pd.set_option('display.max_rows', None)

In [17]:
df['Primary Technology'].value_counts()

Primary Technology
Solar Photovoltaic                             5340
Conventional Hydroelectric                     1393
Onshore Wind Turbine                           1301
Multiple                                        900
Petroleum Liquids                               797
Natural Gas Fired Combustion Turbine            605
Natural Gas Fired Combined Cycle                433
Natural Gas Internal Combustion Engine          300
Landfill Gas                                    292
Batteries                                       232
--                                              182
Conventional Steam Coal                         164
Wood/Wood Waste Biomass                         159
Other Natural Gas                               135
Natural Gas Steam Turbine                       133
Other Waste Biomass                              78
Geothermal                                       59
Municipal Solid Waste                            57
Nuclear                                      

## Categorizing energy sources

In [18]:
def categorize_energy(source):
    if source in [
        'Natural Gas Fired Combustion Turbine',
        'Natural Gas Fired Combined Cycle',
        'Natural Gas Internal Combustion Engine',
        'Natural Gas Steam Turbine',
        'Other Natural Gas'
    ]:
        return 'Natural Gas'
    elif source in [
        'Conventional Steam Coal',
        'Coal Integrated Gasification Combined Cycle'
    ]:
        return 'Coal'
    elif source in [
        'Petroleum Liquids',
        'Petroleum Coke'
    ]:
        return 'Petroleum'
    elif source == 'Nuclear':
        return 'Nuclear'
    elif source in [
        'Solar Photovoltaic',
        'Conventional Hydroelectric',
        'Onshore Wind Turbine',
        'Offshore Wind Turbine',
        'Wood/Wood Waste Biomass',
        'Other Waste Biomass',
        'Landfill Gas',
        'Geothermal',
        'Municipal Solid Waste',
        'Hydroelectric Pumped Storage',
        'Solar Thermal without Energy Storage',
        'Solar Thermal with Energy Storage'
    ]:
        return 'Renewable Energy Sources'
    else:
        return 'Other'

# Apply the function to create a new column
df['energy_category'] = df['Primary Technology'].apply(categorize_energy)

In [19]:
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources


In [20]:
df_list = df.to_dict(orient="records")

## 👉 convert addresses --> lat/long 

See the [census-examples](https://github.com/data4news/census-examples) repository for examples. If you need help, try asking in the class slack channel. Chances are someone in the class is struggling with the same problem as you are so we might as well all learn together in the same slack channel! 

In [21]:
from bs4 import BeautifulSoup

In [22]:
df_list[0:5]

[{'plant_name': '(3K) 59 Hetcheltown Rd',
  'Plant Code': 66729,
  'State': 'NY',
  'Sector Name': 'IPP Non-CHP',
  'Prime Movers': 'PV',
  'Fuel Types': 'SUN',
  'Primary Technology': 'Solar Photovoltaic',
  'withdrawal_volume_million_gallons': nan,
  'discharge_volume_million_gallons': nan,
  'consumption_volume_million_gallons': nan,
  'energy_category': 'Renewable Energy Sources'},
 {'plant_name': '0 Hammond St CSG',
  'Plant Code': 64876,
  'State': 'MA',
  'Sector Name': 'IPP Non-CHP',
  'Prime Movers': 'BA, PV',
  'Fuel Types': 'MWH, SUN',
  'Primary Technology': 'Multiple',
  'withdrawal_volume_million_gallons': nan,
  'discharge_volume_million_gallons': nan,
  'consumption_volume_million_gallons': nan,
  'energy_category': 'Other'},
 {'plant_name': '1 Commercial',
  'Plant Code': 67464,
  'State': 'MA',
  'Sector Name': 'IPP Non-CHP',
  'Prime Movers': 'BA, PV',
  'Fuel Types': 'MWH, SUN',
  'Primary Technology': 'Multiple',
  'withdrawal_volume_million_gallons': nan,
  'disch

In [23]:
len(df_list)

12722

## Using the plant code, scraping the lat/long of the plant from each plant's unique URL on the EIA data system

In [24]:
from tqdm.asyncio import tqdm  
import asyncio
import pandas as pd
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup

# Function to process each chunk of plants
async def process_plant_chunk(plant_chunk):
    chunk_data = []  # Store processed data for this chunk

    async with async_playwright() as playwright:
        browser = await playwright.chromium.launch(headless=True)
        page = await browser.new_page()

        for plant in plant_chunk:
            try:
                plant_ID = plant['Plant Code']
                url = f"https://www.eia.gov/beta/electricity/data/browser/#/plant/{plant_ID}/?freq=A&pin="
                
                await page.goto(url)
                await page.wait_for_timeout(10000)
                
                html = await page.content()
                soup_doc = BeautifulSoup(html, "html.parser")
                
                link = soup_doc.find('div', class_='table_title').find('div').find_all('a')[1]['href']
                plant['Link'] = link
                chunk_data.append(plant)  # Store processed plant data

            except Exception as e:
                print(f"Error processing plant {plant_ID}: {e}")

        await browser.close()  # Close browser after processing the chunk

    return chunk_data

# Function to process data in chunks and save results incrementally
async def process_in_chunks(df_list, chunk_size=10, output_file="output.csv"):
    df = pd.DataFrame(df_list)  # Ensure it's a DataFrame
    
    # Save header to CSV first
    df[0:0].to_csv(output_file, mode='w', header=True)

    for i in tqdm(range(0, len(df_list), chunk_size), desc="Processing chunks"):
        chunk = df_list[i:i + chunk_size]
        chunk_data = await process_plant_chunk(chunk)  # Process chunk
        
        if chunk_data:  # If there is valid data, append it to CSV
            chunk_df = pd.DataFrame(chunk_data)
            chunk_df.to_csv(output_file, mode='a', header=False)

# Function to run asyncio tasks safely in Jupyter Notebook
def run_asyncio():
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None

    if loop and loop.is_running():
        print("Running inside Jupyter, using create_task()")
        return asyncio.create_task(process_in_chunks(df_list))
    else:
        print("Starting new event loop")
        asyncio.run(process_in_chunks(df_list))

 #Run the async function
await run_asyncio()


Running inside Jupyter, using create_task()


Processing chunks:   0%| | 3/1273 [06:22<48:12:58, 136.68s/i

Error processing plant 8812: list index out of range


Processing chunks:   3%| | 32/1273 [55:52<35:17:55, 102.40s/

Error processing plant 8865: list index out of range


Processing chunks:   5%| | 65/1273 [1:52:11<34:15:49, 102.11

Error processing plant 8851: list index out of range


Processing chunks:   7%| | 84/1273 [2:31:38<33:48:24, 102.36

Error processing plant 8823: list index out of range


Processing chunks:   7%| | 90/1273 [2:41:50<33:33:23, 102.12

Error processing plant 8831: list index out of range


Processing chunks:   8%| | 103/1273 [3:03:58<33:08:51, 101.9

Error processing plant 8809: list index out of range


Processing chunks:  13%|▏| 169/1273 [4:56:15<31:17:05, 102.0

Error processing plant 8852: list index out of range


Processing chunks:  15%|▏| 186/1273 [5:25:12<30:51:05, 102.1

Error processing plant 8837: list index out of range


Processing chunks:  17%|▏| 214/1273 [6:46:14<30:24:52, 103.3

Error processing plant 8848: list index out of range


Processing chunks:  19%|▏| 247/1273 [8:01:06<29:19:15, 102.8

Error processing plant 8866: list index out of range


Processing chunks:  20%|▏| 253/1273 [8:11:18<28:55:54, 102.1

Error processing plant 8846: list index out of range


Processing chunks:  20%|▏| 255/1273 [8:14:43<28:51:46, 102.0

Error processing plant 8838: list index out of range


Processing chunks:  20%|▏| 257/1273 [8:18:06<28:47:30, 102.0

Error processing plant 8835: list index out of range


Processing chunks:  23%|▏| 290/1273 [9:17:03<27:53:29, 102.1

Error processing plant 8841: list index out of range


Processing chunks:  23%|▏| 296/1273 [9:27:17<27:44:39, 102.2

Error processing plant 8816: list index out of range


Processing chunks:  32%|▎| 403/1273 [13:39:07<24:48:16, 102.

Error processing plant 8899: list index out of range


Processing chunks:  33%|▎| 421/1273 [14:09:51<24:15:32, 102.

Error processing plant 8834: list index out of range


Processing chunks:  39%|▍| 498/1273 [16:21:22<22:04:58, 102.

Error processing plant 8868: list index out of range


Processing chunks:  42%|▍| 540/1273 [17:43:33<20:56:02, 102.

Error processing plant 8827: list index out of range
Error processing plant 8824: list index out of range


Processing chunks:  47%|▍| 595/1273 [19:18:18<19:17:17, 102.

Error processing plant 8845: list index out of range


Processing chunks:  47%|▍| 598/1273 [19:36:09<47:33:40, 253.

Error processing plant 54885: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/54885/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/54885/?freq=A&pin=", waiting until "load"

Error processing plant 62960: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/62960/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/62960/?freq=A&pin=", waiting until "load"

Error processing plant 876: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/876/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/876/?freq=A&pin=", waiting until "load"

Error processing plant 62614: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/62614/?freq=A&pin=
Call log:
navigating

Processing chunks:  47%|▍| 599/1273 [19:36:09<33:16:10, 177.

Error processing plant 66323: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/66323/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/66323/?freq=A&pin=", waiting until "load"

Error processing plant 61000: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/61000/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/61000/?freq=A&pin=", waiting until "load"

Error processing plant 55581: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/55581/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/55581/?freq=A&pin=", waiting until "load"

Error processing plant 63104: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/63104/?freq=A&pin=
Call log:
navi

Processing chunks:  47%|▍| 600/1273 [19:36:10<23:16:46, 124.

Error processing plant 1296: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/1296/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/1296/?freq=A&pin=", waiting until "load"

Error processing plant 60639: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60639/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60639/?freq=A&pin=", waiting until "load"

Error processing plant 59864: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59864/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59864/?freq=A&pin=", waiting until "load"

Error processing plant 6518: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/6518/?freq=A&pin=
Call log:
navigatin

Processing chunks:  47%|▍| 602/1273 [19:36:11<11:25:11, 61.2

Error processing plant 54385: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/54385/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/54385/?freq=A&pin=", waiting until "load"

Error processing plant 65466: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/65466/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/65466/?freq=A&pin=", waiting until "load"

Error processing plant 58877: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58877/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58877/?freq=A&pin=", waiting until "load"

Error processing plant 59125: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59125/?freq=A&pin=
Call log:
navi

Processing chunks:  47%|▍| 603/1273 [19:36:11<8:00:30, 43.03

Error processing plant 58177: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58177/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58177/?freq=A&pin=", waiting until "load"

Error processing plant 58240: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58240/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58240/?freq=A&pin=", waiting until "load"

Error processing plant 57244: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57244/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/57244/?freq=A&pin=", waiting until "load"

Error processing plant 57065: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57065/?freq=A&pin=
Call log:
navi

Processing chunks:  48%|▍| 605/1273 [19:36:12<3:58:27, 21.42

Error processing plant 7849: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/7849/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/7849/?freq=A&pin=", waiting until "load"

Error processing plant 57119: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57119/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/57119/?freq=A&pin=", waiting until "load"

Error processing plant 6281: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/6281/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/6281/?freq=A&pin=", waiting until "load"

Error processing plant 65681: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/65681/?freq=A&pin=
Call log:
navigating

Processing chunks:  48%|▍| 607/1273 [19:36:14<2:00:00, 10.81

Error processing plant 61101: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/61101/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/61101/?freq=A&pin=", waiting until "load"

Error processing plant 56295: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/56295/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/56295/?freq=A&pin=", waiting until "load"

Error processing plant 2876: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/2876/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/2876/?freq=A&pin=", waiting until "load"

Error processing plant 10720: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/10720/?freq=A&pin=
Call log:
navigat

Processing chunks:  48%|▍| 608/1273 [19:36:14<1:25:24,  7.71

Error processing plant 60766: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60766/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60766/?freq=A&pin=", waiting until "load"

Error processing plant 57868: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57868/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/57868/?freq=A&pin=", waiting until "load"

Error processing plant 61744: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/61744/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/61744/?freq=A&pin=", waiting until "load"

Error processing plant 57836: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57836/?freq=A&pin=
Call log:
navi

Processing chunks:  48%|▍| 609/1273 [19:36:15<1:01:11,  5.53

Error processing plant 65406: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/65406/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/65406/?freq=A&pin=", waiting until "load"

Error processing plant 59611: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59611/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59611/?freq=A&pin=", waiting until "load"

Error processing plant 57603: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57603/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/57603/?freq=A&pin=", waiting until "load"

Error processing plant 65723: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/65723/?freq=A&pin=
Call log:
navi

Processing chunks:  48%|▍| 610/1273 [19:36:15<44:14,  4.00s/

Error processing plant 58428: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58428/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58428/?freq=A&pin=", waiting until "load"

Error processing plant 63413: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/63413/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/63413/?freq=A&pin=", waiting until "load"

Error processing plant 63624: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/63624/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/63624/?freq=A&pin=", waiting until "load"

Error processing plant 1297: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/1297/?freq=A&pin=
Call log:
naviga

Processing chunks:  48%|▍| 611/1273 [19:36:16<32:23,  2.94s/

Error processing plant 55151: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/55151/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/55151/?freq=A&pin=", waiting until "load"

Error processing plant 2140: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/2140/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/2140/?freq=A&pin=", waiting until "load"

Error processing plant 1156: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/1156/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/1156/?freq=A&pin=", waiting until "load"

Error processing plant 57556: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/57556/?freq=A&pin=
Call log:
navigating

Processing chunks:  49%|▍| 628/1273 [20:07:01<18:21:40, 102.

Error processing plant 8853: list index out of range


Processing chunks:  54%|▌| 684/1273 [21:42:58<16:46:56, 102.

Error processing plant 8828: list index out of range


Processing chunks:  65%|▋| 832/1273 [25:57:52<12:32:19, 102.

Error processing plant 8857: list index out of range
Error processing plant 8858: list index out of range


Processing chunks:  65%|▋| 833/1273 [25:59:34<12:30:17, 102.

Error processing plant 8843: list index out of range


Processing chunks:  66%|▋| 844/1273 [26:19:32<12:55:24, 108.

Error processing plant 61909: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/61909/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/61909/?freq=A&pin=", waiting until "load"

Error processing plant 50799: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/50799/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/50799/?freq=A&pin=", waiting until "load"

Error processing plant 59544: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59544/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59544/?freq=A&pin=", waiting until "load"

Error processing plant 58198: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58198/?freq=A&pin=
Call log:
navi

Processing chunks:  66%|▋| 845/1273 [26:19:32<9:02:36, 76.07

Error processing plant 50666: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/50666/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/50666/?freq=A&pin=", waiting until "load"

Error processing plant 59530: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59530/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59530/?freq=A&pin=", waiting until "load"

Error processing plant 59222: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59222/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59222/?freq=A&pin=", waiting until "load"

Error processing plant 50311: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/50311/?freq=A&pin=
Call log:
navi

Processing chunks:  66%|▋| 846/1273 [26:19:33<6:19:54, 53.38

Error processing plant 58896: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58896/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58896/?freq=A&pin=", waiting until "load"

Error processing plant 56661: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/56661/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/56661/?freq=A&pin=", waiting until "load"

Error processing plant 58720: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58720/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/58720/?freq=A&pin=", waiting until "load"

Error processing plant 58242: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/58242/?freq=A&pin=
Call log:
navi

Processing chunks:  67%|▋| 847/1273 [26:19:33<4:26:15, 37.50

Error processing plant 63678: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/63678/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/63678/?freq=A&pin=", waiting until "load"

Error processing plant 60449: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60449/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60449/?freq=A&pin=", waiting until "load"

Error processing plant 60886: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60886/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60886/?freq=A&pin=", waiting until "load"

Error processing plant 59702: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59702/?freq=A&pin=
Call log:
navi

Processing chunks:  67%|▋| 848/1273 [26:19:34<3:06:57, 26.39

Error processing plant 63596: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/63596/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/63596/?freq=A&pin=", waiting until "load"

Error processing plant 60715: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60715/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60715/?freq=A&pin=", waiting until "load"

Error processing plant 60530: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60530/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60530/?freq=A&pin=", waiting until "load"

Error processing plant 7408: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/7408/?freq=A&pin=
Call log:
naviga

Processing chunks:  67%|▋| 849/1273 [26:19:35<2:12:13, 18.71

Error processing plant 59157: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59157/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59157/?freq=A&pin=", waiting until "load"

Error processing plant 59221: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/59221/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/59221/?freq=A&pin=", waiting until "load"

Error processing plant 60030: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/60030/?freq=A&pin=
Call log:
navigating to "https://www.eia.gov/beta/electricity/data/browser/#/plant/60030/?freq=A&pin=", waiting until "load"

Error processing plant 56563: Page.goto: net::ERR_INTERNET_DISCONNECTED at https://www.eia.gov/beta/electricity/data/browser/#/plant/56563/?freq=A&pin=
Call log:
navi

Processing chunks:  67%|▋| 855/1273 [26:37:42<14:56:36, 128.

Error processing plant 8832: list index out of range


Processing chunks:  79%|▊| 1009/1273 [31:25:03<7:29:06, 102.

Error processing plant 8850: list index out of range


Processing chunks:  82%|▊| 1042/1273 [32:21:09<6:32:46, 102.

Error processing plant 8847: list index out of range


Processing chunks:  90%|▉| 1143/1273 [35:13:16<3:41:48, 102.

Error processing plant 8829: list index out of range


Processing chunks:  95%|▉| 1204/1273 [36:57:29<1:58:17, 102.

Error processing plant 8867: list index out of range


Processing chunks: 100%|█| 1273/1273 [38:54:03<00:00, 110.01


In [None]:
df = pd.read_csv('output.csv')

## Adding in plants that had errors in the last step

In [25]:
from tqdm.asyncio import tqdm  
import asyncio
import pandas as pd
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup

# ✅ List of extracted plant codes to filter on
plant_codes = [
    8812, 8865, 8851, 8823, 8831, 8809, 8852, 8837, 8848, 8866, 8846, 8838, 8835, 8841,
    8816, 8899, 8834, 8868, 8827, 8824, 8845, 54885, 62960, 876, 62614, 56242, 56686,
    55811, 10294, 7493, 66321, 66323, 61000, 55581, 63104, 59868, 60091, 57979, 2986,
    58902, 57775, 1296, 60639, 59864, 6518, 10872, 59325, 59880, 59881, 63236, 62475,
    254, 65033, 61741, 62050, 10405, 58304, 1778, 785, 3407, 55204, 54385, 65466, 58877,
    59125, 61090, 59832, 64602, 58791, 59735, 63990, 58177, 58240, 57244, 57065, 55103,
    55544, 61553, 61684, 1871, 56798, 55871, 56359, 56468, 63230, 64716, 64720, 1895,
    57412, 3476, 62569, 7849, 57119, 6281, 65681, 57009, 60578, 60864, 54267, 61381,
    4180, 62103, 65725, 56705, 57064, 6304, 10781, 57059, 61058, 61506, 63758, 61101,
    56295, 2876, 10720, 147, 2059, 58697, 59601, 58497, 60765, 60766, 57868, 61744,
    57836, 61646, 10453, 63614, 57872, 57613, 58478, 65406, 59611, 57603, 65723, 58688,
    67520, 66442, 66473, 55010, 55011, 58428, 63413, 63624, 1297, 1241, 4124, 440,
    58118, 61044, 58284, 55151, 2140, 1156, 57556, 60583, 3916, 52006, 6026, 64142,
    2103, 8853, 8828, 8857, 8858, 8843, 61909, 50799, 59544, 58198, 3291, 3290, 63827,
    63264, 63804, 55047, 50666, 59530, 59222, 50311, 60239, 62389, 55656, 58856, 61230,
    58614, 58896, 56661, 58720, 58242, 61014, 57999, 58319, 57620, 60470, 50628, 63678,
    60449, 60886, 59702, 2995, 6248, 59894, 50373, 61665, 57124
]

# ✅ Scrape only plants in this filtered list
async def process_plant_chunk(plant_chunk):
    chunk_data = []

    async with async_playwright() as playwright:
        browser = await playwright.chromium.launch(headless=True)
        page = await browser.new_page()

        for plant in plant_chunk:
            try:
                plant_ID = plant['Plant Code']
                url = f"https://www.eia.gov/beta/electricity/data/browser/#/plant/{plant_ID}/?freq=A&pin="
                
                await page.goto(url)
                await page.wait_for_timeout(10000)

                html = await page.content()
                soup_doc = BeautifulSoup(html, "html.parser")

                link = soup_doc.find('div', class_='table_title').find('div').find_all('a')[1]['href']
                plant['Link'] = link
                chunk_data.append(plant)

            except Exception as e:
                print(f"Error processing plant {plant_ID}: {e}")

        await browser.close()

    return chunk_data

# ✅ Save filtered data incrementally
async def process_in_chunks(df_list, chunk_size=10, output_file="output2.csv"):
    # Filter df_list based on plant_codes
    filtered = [plant for plant in df_list if plant['Plant Code'] in plant_codes]

    # Save empty header first
    pd.DataFrame(filtered)[0:0].to_csv(output_file, mode='w', header=True)

    for i in tqdm(range(0, len(filtered), chunk_size), desc="Processing chunks"):
        chunk = filtered[i:i + chunk_size]
        chunk_data = await process_plant_chunk(chunk)

        if chunk_data:
            pd.DataFrame(chunk_data).to_csv(output_file, mode='a', header=False)

# ✅ If calling from a notebook
def run_asyncio():
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None

    if loop and loop.is_running():
        return asyncio.create_task(process_in_chunks(df_list))
    else:
        asyncio.run(process_in_chunks(df_list))

# ✅ Run scraping — assumes df_list is already defined
await process_in_chunks(df_list)


Processing chunks:   0%|             | 0/21 [00:00<?, ?it/s]

Error processing plant 8812: list index out of range
Error processing plant 8865: list index out of range
Error processing plant 8851: list index out of range
Error processing plant 8823: list index out of range
Error processing plant 8831: list index out of range
Error processing plant 8809: list index out of range
Error processing plant 8852: list index out of range
Error processing plant 8837: list index out of range
Error processing plant 8848: list index out of range


Processing chunks:   5%|▏   | 1/21 [01:44<34:42, 104.11s/it]

Error processing plant 8866: list index out of range
Error processing plant 8846: list index out of range
Error processing plant 8838: list index out of range
Error processing plant 8835: list index out of range
Error processing plant 8841: list index out of range
Error processing plant 8816: list index out of range
Error processing plant 8899: list index out of range
Error processing plant 8834: list index out of range
Error processing plant 8868: list index out of range
Error processing plant 8827: list index out of range


Processing chunks:  10%|▍   | 2/21 [03:26<32:40, 103.19s/it]

Error processing plant 8824: list index out of range
Error processing plant 8845: list index out of range


Processing chunks:  76%|██▎| 16/21 [27:26<08:35, 103.10s/it]

Error processing plant 8853: list index out of range
Error processing plant 8828: list index out of range
Error processing plant 8857: list index out of range
Error processing plant 8858: list index out of range
Error processing plant 8843: list index out of range


Processing chunks: 100%|███| 21/21 [35:20<00:00, 100.99s/it]


In [26]:
df_2 = pd.read_csv('output2.csv')

In [27]:
df_2.head()

Unnamed: 0.1,Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category
0,Kimberly Mill,54885,WI,Electric Utility,HY,WAT,Conventional Hydroelectric,,,,Renewable Energy Sources,"map/?center=-88.3344,44.2769&level=14"
1,Kimberly-Clark Solar,62960,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-85.034,33&level=14"
2,Kincaid Generation LLC,876,IL,IPP Non-CHP,ST,SUB,Conventional Steam Coal,262547.0,262547.0,--,Coal,"map/?center=-89.496389,39.590556&level=14"
3,Kinder Morgan Fordham,62614,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.236582,40.542557&level=14"
4,Kindred School,56242,ND,Commercial Non-CHP,IC,DFO,Petroleum Liquids,,,,Petroleum,"map/?center=-97.00953,46.64974&level=14"


In [29]:
df_2['longitude'] = df_2['energy_category'].str.extract(r'map/\?center=(-?\d+\.\d+),')

In [30]:
df_2['latitude'] = df_2['energy_category'].str.extract(r',(\d+\.\d+)')

In [31]:
df_2.head()

Unnamed: 0.1,Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,longitude,latitude
0,Kimberly Mill,54885,WI,Electric Utility,HY,WAT,Conventional Hydroelectric,,,,Renewable Energy Sources,"map/?center=-88.3344,44.2769&level=14",-88.3344,44.2769
1,Kimberly-Clark Solar,62960,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-85.034,33&level=14",-85.034,
2,Kincaid Generation LLC,876,IL,IPP Non-CHP,ST,SUB,Conventional Steam Coal,262547.0,262547.0,--,Coal,"map/?center=-89.496389,39.590556&level=14",-89.496389,39.590556
3,Kinder Morgan Fordham,62614,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.236582,40.542557&level=14",-74.236582,40.542557
4,Kindred School,56242,ND,Commercial Non-CHP,IC,DFO,Petroleum Liquids,,,,Petroleum,"map/?center=-97.00953,46.64974&level=14",-97.00953,46.64974


In [32]:
df_2 = df_2.reset_index()

In [33]:
columns = df_2.columns.tolist()

# Shift the first 11 headers one column to the left
columns[:11] = columns[1:12]  # Move headers left
columns[10] = ""  # Clear the last shifted column

# Assign the corrected headers back
df_2.columns = columns

df_2.head()

Unnamed: 0.1,Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,Unnamed: 11,consumption_volume_million_gallons,energy_category,longitude,latitude
0,0,Kimberly Mill,54885,WI,Electric Utility,HY,WAT,Conventional Hydroelectric,,,,Renewable Energy Sources,"map/?center=-88.3344,44.2769&level=14",-88.3344,44.2769
1,1,Kimberly-Clark Solar,62960,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-85.034,33&level=14",-85.034,
2,2,Kincaid Generation LLC,876,IL,IPP Non-CHP,ST,SUB,Conventional Steam Coal,262547.0,262547.0,--,Coal,"map/?center=-89.496389,39.590556&level=14",-89.496389,39.590556
3,3,Kinder Morgan Fordham,62614,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.236582,40.542557&level=14",-74.236582,40.542557
4,4,Kindred School,56242,ND,Commercial Non-CHP,IC,DFO,Petroleum Liquids,,,,Petroleum,"map/?center=-97.00953,46.64974&level=14",-97.00953,46.64974


In [34]:
df_2 = df_2.drop(columns=['Unnamed: 0'])

In [35]:
df_2.shape

(180, 14)

In [36]:
df_2.to_csv('output2.csv')

In [88]:
df = pd.read_csv('output.csv')

In [89]:
df.shape

(12481, 12)

In [90]:
df.head()

Unnamed: 0.1,Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14"
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14"
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14"
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14"
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14"


In [91]:
df.dtypes

Unnamed: 0                            object
plant_name                             int64
Plant Code                            object
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
consumption_volume_million_gallons    object
energy_category                       object
dtype: object

In [60]:
df_2.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
                                      object
consumption_volume_million_gallons    object
energy_category                       object
longitude                             object
latitude                              object
dtype: object

In [5]:
df = pd.read_csv('plants_with_coordinates.csv')

In [6]:
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812


In [7]:
df['longitude'] = df['map_link'].str.extract(r'map/\?center=(-?\d+(?:\.\d+)?),')
df['latitude'] = df['map_link'].str.extract(r',(-?\d+(?:\.\d+)?)')

In [9]:
df['longitude']

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
12656,Pawnee,6248,CO,Electric Utility,ST,SUB,Conventional Steam Coal,1970.0,0.0,1970.0,Coal,"map/?center=-103.6803,40.2217&level=14",-103.6803,40.2217
12657,Pawpaw Solar Plant,59894,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-84.256778,32.572875&level=14",-84.256778,32.572875
12658,Paxton Creek Cogeneration,50373,PA,IPP CHP,IC,NG,Natural Gas Internal Combustion Engine,,,,Natural Gas,"map/?center=-76.8771,40.2653&level=14",-76.8771,40.2653
12659,Payne Creek Solar,61665,FL,Electric Utility,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-81.964222,27.664583&level=14",-81.964222,27.664583
12660,Payne's Ferry,57124,ID,IPP Non-CHP,WT,WND,Onshore Wind Turbine,,,,Renewable Energy Sources,"map/?center=-115.010833,42.825278&level=14",-115.010833,42.825278


In [15]:
df.to_csv('plants_with_coordinates.csv')

In [14]:
df.shape

(12661, 14)

In [98]:
columns = df.columns.tolist()

# Shift the first 11 headers one column to the left
columns[:11] = columns[1:12]  # Move headers left
columns[10] = ""  # Clear the last shifted column

# Assign the corrected headers back
df.columns = columns

df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,Unnamed: 11,energy_category,longitude,latitude
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812


In [99]:
df.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
consumption_volume_million_gallons    object
                                      object
energy_category                       object
longitude                             object
latitude                              object
dtype: object

In [100]:
if len(df.columns) > 11:
    df.columns.values[10] = 'energy_category'
    df.columns.values[11] = 'map_link'

In [101]:
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812


In [83]:
df_2.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,Unnamed: 10,consumption_volume_million_gallons,energy_category,longitude,latitude
0,Kimberly Mill,54885,WI,Electric Utility,HY,WAT,Conventional Hydroelectric,,,,Renewable Energy Sources,"map/?center=-88.3344,44.2769&level=14",-88.3344,44.2769
1,Kimberly-Clark Solar,62960,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-85.034,33&level=14",-85.034,
2,Kincaid Generation LLC,876,IL,IPP Non-CHP,ST,SUB,Conventional Steam Coal,262547.0,262547.0,--,Coal,"map/?center=-89.496389,39.590556&level=14",-89.496389,39.590556
3,Kinder Morgan Fordham,62614,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.236582,40.542557&level=14",-74.236582,40.542557
4,Kindred School,56242,ND,Commercial Non-CHP,IC,DFO,Petroleum Liquids,,,,Petroleum,"map/?center=-97.00953,46.64974&level=14",-97.00953,46.64974


In [82]:
df_2.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
                                      object
consumption_volume_million_gallons    object
energy_category                       object
longitude                             object
latitude                              object
dtype: object

In [86]:
if len(df_2.columns) > 11:
    df_2.columns.values[9] = 'consumption_volume_million_gallons'
    df_2.columns.values[10] = 'energy_category'
    df_2.columns.values[11] = 'map_link'

In [87]:
df_2.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
0,Kimberly Mill,54885,WI,Electric Utility,HY,WAT,Conventional Hydroelectric,,,,Renewable Energy Sources,"map/?center=-88.3344,44.2769&level=14",-88.3344,44.2769
1,Kimberly-Clark Solar,62960,GA,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-85.034,33&level=14",-85.034,
2,Kincaid Generation LLC,876,IL,IPP Non-CHP,ST,SUB,Conventional Steam Coal,262547.0,262547.0,--,Coal,"map/?center=-89.496389,39.590556&level=14",-89.496389,39.590556
3,Kinder Morgan Fordham,62614,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.236582,40.542557&level=14",-74.236582,40.542557
4,Kindred School,56242,ND,Commercial Non-CHP,IC,DFO,Petroleum Liquids,,,,Petroleum,"map/?center=-97.00953,46.64974&level=14",-97.00953,46.64974


In [102]:
df.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
consumption_volume_million_gallons    object
energy_category                       object
map_link                              object
longitude                             object
latitude                              object
dtype: object

In [103]:
df_2.dtypes

plant_name                            object
Plant Code                             int64
State                                 object
Sector Name                           object
Prime Movers                          object
Fuel Types                            object
Primary Technology                    object
withdrawal_volume_million_gallons     object
discharge_volume_million_gallons      object
consumption_volume_million_gallons    object
energy_category                       object
map_link                              object
longitude                             object
latitude                              object
dtype: object

In [105]:
combined_df = pd.concat([df, df_2], ignore_index=True)

In [110]:
combined_df.shape

(12661, 14)

In [111]:
combined_df.to_csv('plants_with_coordinates.csv', index=False)

In [109]:
df = pd.read_csv('plants_with_coordinates.csv')
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812


## 👉 convert lat/long to census geography codes 

(like 'GEOID', 'STATE', 'COUNTY', 'TRACT', 'BLOCK', etc...)

Same note as above, see [census-examples](https://github.com/data4news/census-examples) repository for examples or ask in the class slack channel if stuck.

In [43]:
!pip install censusgeocode

Collecting censusgeocode
  Downloading censusgeocode-0.5.2-py3-none-any.whl.metadata (6.6 kB)
Collecting requests-toolbelt<1,>=0.9.0 (from censusgeocode)
  Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl.metadata (14 kB)
Downloading censusgeocode-0.5.2-py3-none-any.whl (9.2 kB)
Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl (54 kB)
Installing collected packages: requests-toolbelt, censusgeocode
Successfully installed censusgeocode-0.5.2 requests-toolbelt-0.10.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [13]:
!pip install --upgrade --force-reinstall urllib3

Collecting urllib3
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Using cached urllib3-2.3.0-py3-none-any.whl (128 kB)
Installing collected packages: urllib3
  Attempting uninstall: urllib3
    Found existing installation: urllib3 2.3.0
    Uninstalling urllib3-2.3.0:
      Successfully uninstalled urllib3-2.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandasai 2.4.2 requires pandas==1.5.3, but you have pandas 2.2.3 which is incompatible.[0m[31m
[0mSuccessfully installed urllib3-2.3.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [14]:
!pip install --upgrade --force-reinstall censusgeocode

Collecting censusgeocode
  Using cached censusgeocode-0.5.2-py3-none-any.whl.metadata (6.6 kB)
Collecting requests<3,>=2.27.0 (from requests[security]<3,>=2.27.0->censusgeocode)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting requests-toolbelt<1,>=0.9.0 (from censusgeocode)
  Using cached requests_toolbelt-0.10.1-py2.py3-none-any.whl.metadata (14 kB)
Collecting charset-normalizer<4,>=2 (from requests<3,>=2.27.0->requests[security]<3,>=2.27.0->censusgeocode)
  Using cached charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests<3,>=2.27.0->requests[security]<3,>=2.27.0->censusgeocode)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests<3,>=2.27.0->requests[security]<3,>=2.27.0->censusgeocode)
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests<3,>=2.27.0->requests[security]<3,>=2.27.0->c

In [13]:
!pip uninstall censusgeocode requests_toolbelt urllib3 requests -y

Found existing installation: censusgeocode 0.5.2
Uninstalling censusgeocode-0.5.2:
  Successfully uninstalled censusgeocode-0.5.2
Found existing installation: requests-toolbelt 0.10.1
Uninstalling requests-toolbelt-0.10.1:
  Successfully uninstalled requests-toolbelt-0.10.1
Found existing installation: urllib3 2.3.0
Uninstalling urllib3-2.3.0:
  Successfully uninstalled urllib3-2.3.0
Found existing installation: requests 2.32.3
Uninstalling requests-2.32.3:
  Successfully uninstalled requests-2.32.3


In [14]:
!pip install requests urllib3==1.26.16 requests_toolbelt==0.9.1 censusgeocode

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting urllib3==1.26.16
  Downloading urllib3-1.26.16-py2.py3-none-any.whl.metadata (48 kB)
Collecting requests_toolbelt==0.9.1
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl.metadata (13 kB)
Collecting censusgeocode
  Using cached censusgeocode-0.5.2-py3-none-any.whl.metadata (6.6 kB)
Downloading urllib3-1.26.16-py2.py3-none-any.whl (143 kB)
Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached censusgeocode-0.5.2-py3-none-any.whl (9.2 kB)
Installing collected packages: urllib3, requests, requests_toolbelt, censusgeocode
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandasai 2.4.2 requires pandas==1.5.3, but you have pandas 2.2.3 which is incompatible.[0m[31m
[0mSuccessfull

In [98]:
import glob
import json
import requests
import pandas as pd
from pprint import pprint

In [99]:
import pandas as pd
import censusgeocode as cg
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm
import csv 

import requests_cache
cache = requests_cache.CachedSession("geocode_cache", backend="filesystem")

def geocode(lat, lng):
    try:
        url = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates"
        params = {
            "x": lng,
            "y": lat,
            "benchmark": "Public_AR_Census2020",
            "vintage": "Census2020_Census2020",
            "format": "json"
        }
        response = cache.get(url, params=params)
        response.raise_for_status()
        data = response.json()
        census = data['result']['geographies']['Census Tracts'][0]
        return census
    except Exception as e:
        print(f"Error geocoding ({lat}, {lng}): {e}")
        return None

def bulk_geocode(latitudes, longitudes):
    """
    Geocode a list of latitudes and longitudes in parallel (for speed).
    """

    with ThreadPoolExecutor() as tpe:
        latitudes = df['latitude']
        longitudes = df['longitude']
        mapped_results = tpe.map(geocode, latitudes, longitudes)
        data = list(tqdm(mapped_results, total=len(df)))

    return pd.DataFrame(data)

# census_geos_df = bulk_geocode(df['latitude'], df['longitude']) 
# census_geos_df.head()

with open('censusgeos.csv', 'w') as f:
    header = ['SUFFIX', 'POP100', 'GEOID', 'CENTLAT', 'BLOCK', 'AREAWATER', 'STATE', 'BASENAME', 'OID', 'LSADC', 'INTPTLAT', 'FUNCSTAT', 'NAME', 'OBJECTID', 'TRACT', 'CENTLON', 'BLKGRP', 'AREALAND', 'HU100', 'INTPTLON', 'MTFCC', 'LWBLKTYP', 'UR', 'COUNTY']
    writer = csv.DictWriter(f, fieldnames=header)

    writer.writeheader()
    
    for _, row in tqdm(df.iterrows(), total=len(df)):
        json_data = geocode(row['latitude'], row['longitude'])
        if json_data:
            writer.writerow(json_data)    

census_geos_df = pd.read_csv('censusgeos.csv')

  0%|          | 0/1596 [00:00<?, ?it/s]

Error geocoding (41.409, nan): 400 Client Error:  for url: https://geocoding.geo.census.gov/geocoder/geographies/coordinates?x=nan&y=41.409&benchmark=Public_AR_Census2020&vintage=Census2020_Census2020&format=json
Error geocoding (40.6631, nan): 400 Client Error:  for url: https://geocoding.geo.census.gov/geocoder/geographies/coordinates?x=nan&y=40.6631&benchmark=Public_AR_Census2020&vintage=Census2020_Census2020&format=json
Error geocoding (nan, -77.0939): 400 Client Error:  for url: https://geocoding.geo.census.gov/geocoder/geographies/coordinates?x=-77.0939&y=nan&benchmark=Public_AR_Census2020&vintage=Census2020_Census2020&format=json


In [94]:
to_keep = ['GEOID', 'STATE', 'COUNTY', 'TRACT', 'BLOCK']
census_geos_df = census_geos_df[to_keep]
census_geos_df

Unnamed: 0,GEOID,STATE,COUNTY,TRACT,BLOCK
0,26005032401,26,5,32401,
1,36061015102,36,61,15102,
2,18129040400,18,129,40400,
3,9009175400,9,9,175400,
4,26013000200,26,13,200,
...,...,...,...,...,...
1588,4027000301,4,27,301,
1589,26139022206,26,139,22206,
1590,36081010701,36,81,10701,
1591,17097866000,17,97,866000,


In [95]:
df_with_geos = pd.concat(
    [ 
        df.reset_index(drop=True),
        census_geos_df.reset_index(drop=True)
    ], 
    axis=1)

df_with_geos.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,Unnamed: 10,Natural Gas,longitude,latitude,GEOID,STATE,COUNTY,TRACT,BLOCK
0,491 E 48th Street,7268,MI,Electric Utility,GT,NG,Natural Gas Fired Combustion Turbine,,,,Natural Gas,"map/?center=-86.0853,42.7553&level=14",-86.0853,42.7553,26005030000.0,26.0,5.0,32401.0,
1,59th Street,2503,NY,Electric Utility,GT,NG,Natural Gas Fired Combustion Turbine,,,,Natural Gas,"map/?center=-73.99115,40.771285&level=14",-73.99115,40.771285,36061020000.0,36.0,61.0,15102.0,
2,A B Brown,6137,IN,Electric Utility,GT,NG,Natural Gas Fired Combustion Turbine,,,,Natural Gas,"map/?center=-87.715,37.9053&level=14",-87.715,37.9053,18129040000.0,18.0,129.0,40400.0,
3,A L Pierce,6635,CT,Electric Utility,GT,NG,Natural Gas Fired Combustion Turbine,,,,Natural Gas,"map/?center=-72.834884,41.44823&level=14",-72.834884,41.44823,9009175000.0,9.0,9.0,175400.0,
4,A.J. Mihm Generating Station,61391,MI,Electric Utility,IC,NG,Natural Gas Internal Combustion Engine,,,,Natural Gas,"map/?center=-88.614009,46.792669&level=14",-88.614009,46.792669,26013000000.0,26.0,13.0,200.0,


## 👉 Output Data

Output your dataframe containing your data and the Census connector codes (like tract, block, etc...).

In [96]:
df_with_geos.to_csv('plants_geocode.csv', index=False)