In [1]:
# Sample csv file with well's name and API#:

# well 1:
# Name: Carson SWD 5301 12-24
# API#: 33-053-90329

# well 2:
# Name: Atlanta #1 SWD
# API#: 33-105-90258

# well 3:
# Name: Chalmers Wade Federal 5301 44-24 12TXR
# API#: 33-053-06012



In [3]:
################################################
# STEP 1
################################################
import re
import csv
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd

BASE = "https://www.drillingedge.com"
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}

# Single session so cookies persist across requests
session = requests.Session()
session.headers.update(HEADERS)
session.get(BASE)  # establish session cookie

def search_well_url(well_name: str, api: str) -> str | None:
    # Normalize API: strip trailing completion code like -00 so it matches drillingedge URLs
    # e.g. "33-053-05906-00" -> "33-053-05906"
    
    
    if api:
        api=api.strip()
        params = {"type": "wells", "operator_name":"", "well_name":"", "api_no": api}
    elif well_name is not None:
        well_name=well_name.lower().strip().replace(" ", "-").replace("&", "and")
        short_name = " ".join(well_name.split()[:3])
        params = {"type": "wells", "operator_name":"","well_name": well_name}

    r = session.get(f"{BASE}/search", params=params)
    soup = BeautifulSoup(r.text, "html.parser")
    
    for a in soup.select('a[href*="/wells/"]'):
        href = a.get("href", "")
        full = urljoin(BASE, href)

        if api:
            if api in full:
                return full
            else:
                print("api not matching:",api)
        else:
            if well_name in full:
                return full
            else:
                print("well name not matching:",well_name)
                return full

    return None

In [4]:
# Read wells_data2.csv (use Python engine + skip bad lines: some rows have unquoted commas or bad quotes in fields)
wells = []
with open("wells_data2.csv", newline='', encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        api = row.get("api_number")
        api = None if api is None or api == "" or api.upper() == "NULL" else api.strip('-00')
        well_dict = {
            "name":   row.get("well_name", None),
            "api":    api
        }
        
        if well_dict["api"] is not None or well_dict["name"] is not None:
            wells.append(well_dict)


In [5]:

df = pd.DataFrame(wells)
df.loc[df["api"] != "NULL"]

Unnamed: 0,name,api
0,Basic Game & Fish 34-3,
1,Corps of En ineers 31-10,
2,Lewis & Clark 2-4H,
3,"Field/ Prospect: McKenziel County, NDHarper Oi...",
4,(see details Footages F L Field Name of Contra...,
...,...,...
66,KLINE FEDERAL 5300 31-18 15T,33-053-06755
67,& No. API # County & State SDI Job # Rig Surve...,33-053-08946
68,BUCK SHOT SWD 5300 31-31,33-053-90244
69,Atlanta #1 SWD Footages IQtr-Qtr I Section !To...,


In [6]:

# Search URLs
for well in wells:
#well=wells[2]
    url = search_well_url(well["name"], well["api"])
    well["url"] = url
    print(f"{well['name'][:40]:<40} -> {url or 'NOT FOUND'}")



Basic Game & Fish 34-3                   -> https://www.drillingedge.com/north-dakota/mckenzie-county/wells/basic-game-and-fish-34-3/33-053-02102
well name not matching: corps-of-en-ineers-31-10
Corps of En ineers 31-10                 -> https://www.drillingedge.com/north-dakota/mckenzie-county/wells/corps-of-engineers-31-10/33-053-02148
Lewis & Clark 2-4H                       -> https://www.drillingedge.com/north-dakota/mckenzie-county/wells/lewis-and-clark-2-4h/33-053-02556
Field/ Prospect: McKenziel County, NDHar -> NOT FOUND
(see details Footages F L Field Name of  -> NOT FOUND
Foley Federal 5301 43-12H Original Hole  -> NOT FOUND
and Numbe r 24-HOUR PRODUCTION RA TE (se -> https://www.drillingedge.com/north-dakota/mckenzie-county/wells/yukon-5301-41-12t/33-053-03911
well name not matching: dahl-15-11h
DAHL 15-11H                              -> https://www.drillingedge.com/north-dakota/mckenzie-county/wells/dahl-federal-15-11h/33-053-03703
DAHL FEDERAL 2-15H                     

In [15]:
wells

[{'name': 'Basic Game & Fish 34-3', 'api': None},
 {'name': 'Corps of En ineers 31-10', 'api': None},
 {'name': 'Lewis & Clark 2-4H', 'api': None},
 {'name': 'Field/ Prospect: McKenziel County, NDHarper Oil Company Well Information 8 3/4 to 11',
  'api': None},
 {'name': '(see details Footages F L Field Name of Contractor(s)',
  'api': None},
 {'name': 'Foley Federal 5301 43-12H Original Hole NDIC File No. 20863 Location: Section 12-T153N-R101W County/State: McKenzie County, ND Surveyed From a Depth of: 2,186’ MD to 12,283’ MD Type of Survey: Magnetic MWD Name(s) of MWD Supervisor(s): John Capra / Brandon Ramirez The data and calculations for this survey have been checked by me and conform to the standards and procedures set forth by Professional Directional Ltd. This report represents a true and correct Directional Survey of this well based on the original data obtained at the well site. The survey was calculated using the minimum curvature method. Robert D. Hays / Well Planner',
  'a

In [7]:
print(f"Found URLs for {sum(1 for well in wells if well['url'])} out of {len(wells)} wells")


Found URLs for 59 out of 71 wells


In [None]:
################################################
# STEP 2: Extract well details from each well page
# Fields: api_no, well_name, operator, county, well_status, well_type, closest_city, oil_bbl, gas_mcf
# Run STEP 1 first (defines session and populates wells with url).
# Page: section.meta_info (Well Summary), table.skinny (Well Details), p.block_stat (Barrels of Oil / MCF of Gas)
################################################
import re
from bs4 import BeautifulSoup

HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}

def _parse_number(s: str) -> int | None:
    if not s or "members only" in (s or "").lower():
        return None
    m = re.match(r"\s*([\d,.]+)\s*([kKmM]?)\s*$", (s or "").strip())
    if not m:
        return None
    val = float(m.group(1).replace(",", ""))
    suf = (m.group(2) or "").lower()
    if suf == "k": val *= 1_000
    if suf == "m": val *= 1_000_000
    return int(val)

def _text(elem) -> str | None:
    if elem is None:
        return None
    t = elem.get_text(strip=True)
    return t if t and "members only" not in t.lower() else None

def scrape_well_detail(url: str) -> dict:
    """Fetch well detail page; return api_no, well_name, operator, county, well_status, well_type, closest_city, oil_bbl, gas_mcf."""
    r = session.get(url, headers=HEADERS)
    soup = BeautifulSoup(r.text, "html.parser")
    out = {"api_no": None, "well_name": None, "operator": None, "county": None, "well_status": None, "well_type": None, "closest_city": None, "oil_bbl": None, "gas_mcf": None, "production_dates_on_file": None}

    meta = soup.select_one("section.meta_info")
    if meta:
        for div in meta.select("div"):
            label = div.get_text(" ", strip=True).split(":")[0].strip()
            span = div.select_one("span.detail_point")
            if not span:
                continue
            val = (span.find("a") or span).get_text(strip=True)
            if "Well Name" in label or label == "Well Name":
                out["well_name"] = val
            elif "API #" in label or label == "API #":
                out["api_no"] = val
            elif "Operator" in label or label == "Operator":
                out["operator"] = val
            elif "County" in label or label == "County":
                out["county"] = val
            elif "Production Dates on File" in label:
                out["production_dates_on_file"] = val

    table = soup.select_one("table.skinny")
    if table:
        for tr in table.select("tr"):
            for th in tr.select("th"):
                key = th.get_text(strip=True)
                td = th.find_next_sibling("td")
                val = _text(td) if td else None
                if key == "Well Status":
                    out["well_status"] = val
                elif key == "Well Type":
                    out["well_type"] = val
                elif key == "Closest City":
                    out["closest_city"] = val
                elif key == "County" and out["county"] is None:
                    out["county"] = val
                elif key == "API No." and out["api_no"] is None:
                    out["api_no"] = val
                elif key == "Well Name" and out["well_name"] is None:
                    out["well_name"] = val
                elif key == "Operator" and out["operator"] is None:
                    out["operator"] = val

    for p in soup.select("p.block_stat"):
        num_span = p.select_one("span.dropcap")
        if not num_span:
            continue
        num = _parse_number(num_span.get_text(strip=True))
        num_span.decompose()
        desc = p.get_text(" ", strip=True).lower()
        if "oil" in desc and ("barrel" in desc or "bbl" in desc):
            out["oil_bbl"] = num
        elif "gas" in desc and ("mcf" in desc or "mmcf" in desc):
            out["gas_mcf"] = num

    return out

for well in wells:
    url = well.get("url")
    if url:
        data = scrape_well_detail(url)
        well["api_no"] = data["api_no"]
        well["well_name"] = data["well_name"]
        well["operator"] = data["operator"]
        well["county"] = data["county"]
        well["well_status"] = data["well_status"]
        well["well_type"] = data["well_type"]
        well["closest_city"] = data["closest_city"]
        well["oil_bbl"] = data["oil_bbl"]
        well["gas_mcf"] = data["gas_mcf"]
        well["production_dates_on_file"] = data["production_dates_on_file"]
    else:
        well["api_no"] = well.get("api")
        well["well_name"] = well.get("name")
        well["operator"] = None
        well["county"] = well.get("county")  # keep CSV county when no URL
        well["well_status"] = well["well_type"] = well["closest_city"] = None
        well["oil_bbl"] = well["gas_mcf"] = well["production_dates_on_file"] = None

    


[{'api': None,
  'api_no': '33-053-02102',
  'closest_city': 'Williston',
  'county': 'McKenzie County, ND',
  'gas_mcf': 518,
  'name': 'Basic Game & Fish 34-3',
  'oil_bbl': 518,
  'operator': 'Rim Operating, Inc.',
  'production_dates_on_file': 'January 1986 to December 2025',
  'url': 'https://www.drillingedge.com/north-dakota/mckenzie-county/wells/basic-game-and-fish-34-3/33-053-02102',
  'well_name': 'Basic Game And Fish 34-3',
  'well_status': 'Active',
  'well_type': 'Oil & Gas'},
 {'api': None,
  'api_no': '33-053-02148',
  'closest_city': 'Williston',
  'county': 'McKenzie County, ND',
  'gas_mcf': None,
  'name': 'Corps of En ineers 31-10',
  'oil_bbl': 64,
  'operator': 'Rim Operating, Inc.',
  'production_dates_on_file': 'November 1986 to December 2025',
  'url': 'https://www.drillingedge.com/north-dakota/mckenzie-county/wells/corps-of-engineers-31-10/33-053-02148',
  'well_name': 'Corps Of Engineers 31-10',
  'well_status': 'Plugged and Abandoned',
  'well_type': 'Oil & G

In [10]:
import pprint
pprint.pprint(wells)

[{'api': None,
  'api_no': '33-053-02102',
  'closest_city': 'Williston',
  'county': 'McKenzie County, ND',
  'gas_mcf': 518,
  'name': 'Basic Game & Fish 34-3',
  'oil_bbl': 518,
  'operator': 'Rim Operating, Inc.',
  'production_dates_on_file': 'January 1986 to December 2025',
  'url': 'https://www.drillingedge.com/north-dakota/mckenzie-county/wells/basic-game-and-fish-34-3/33-053-02102',
  'well_name': 'Basic Game And Fish 34-3',
  'well_status': 'Active',
  'well_type': 'Oil & Gas'},
 {'api': None,
  'api_no': '33-053-02148',
  'closest_city': 'Williston',
  'county': 'McKenzie County, ND',
  'gas_mcf': None,
  'name': 'Corps of En ineers 31-10',
  'oil_bbl': 64,
  'operator': 'Rim Operating, Inc.',
  'production_dates_on_file': 'November 1986 to December 2025',
  'url': 'https://www.drillingedge.com/north-dakota/mckenzie-county/wells/corps-of-engineers-31-10/33-053-02148',
  'well_name': 'Corps Of Engineers 31-10',
  'well_status': 'Plugged and Abandoned',
  'well_type': 'Oil & G