In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from io import StringIO
import time
# pd.set_option('display.max_rows', None)

from src.utils import connect_to_db

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [3]:
try:
    cnx = connect_to_db(dbname='Sitio_GIS')
    cursor = cnx.cursor()
    query = f'''SELECT distinct [API_10]
                FROM [Sitio_GIS].[dbo].[STR_WELL_UNITS]
                left join [Conduit].[dbo].[CCWellHeaderAnalysis] on [Conduit].[dbo].[CCWellHeaderAnalysis].[Api10] = [Sitio_GIS].[dbo].[STR_WELL_UNITS].[API_10]
                where ([Conduit].[dbo].[CCWellHeaderAnalysis].[State] = 'TX') and ([Sitio_GIS].[dbo].[STR_WELL_UNITS].[created_date] >= '2024-01-01');'''
    df_api = pd.read_sql(query,con=cnx)
except Exception as e:
    print(e)
finally:
    cursor.close()
    cnx.close()

df_api

Connected to Sitio_GIS!


  df_api = pd.read_sql(query,con=cnx)


Unnamed: 0,API_10
0,4213544018
1,4213544019
2,4213544020
3,4213544021
4,4213544022
...,...
82,4246142606
83,4246142608
84,4246142609
85,4246142610


In [9]:
base_url = 'https://webapps2.rrc.texas.gov/EWA/'
query_url = base_url + 'drillingPermitsQueryAction.do'

In [3]:
# df_api = pd.read_excel(r'C:\Users\Apoorva.Saxena\OneDrive - Sitio Royalties\Desktop\Project - Apoorva\Python\Scraping\RRC\notebook\Well_List_11_1_2023.xlsx')
# api_list = [4212335341,4238940705,4238940783,4217730498] #No Ammendments, Ammnedment filed no approval date, one ammendment, more than 3 ammendemnts

#### Functions

In [7]:
def get_headers():

    headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Host': 'webapps2.rrc.texas.gov',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    'sec-ch-ua': "Google Chrome;v='119', 'Chromium';v='119', 'Not?A_Brand';v='24'",
    'sec-ch-ua-mobile': "?0",
    'sec-ch-ua-platform': "Windows"
    }
    
    return headers


def payload(api):
         
    """      
    Parameters
    ----------
    api : TYPE, int
        DESCRIPTION. 10 digit API. Strips API of initial '42'.

    Returns
    -------
    Dict.
    """

    # Check the length of API

    if len(str(api)) == 14:
        api = int(str(api)[2:10])
    elif len(str(api)) == 10:
        api = int(str(api)[2:])
    else:
        raise Exception("Lenght of API don't match with 10 or 14 digit",api)

    payload = {
    'methodToCall': 'search',
    'searchArgs.apiNoHndlr.inputValue': api,
    'searchArgs.operatorNameWildcardHndlr.inputValue': 'beginsWith',
    'searchArgs.leaseNameWildcardHndlr.inputValue': 'beginsWith',
    'searchArgs.fieldNameWildcardHndlr.inputValue': 'beginsWith',
    'searchArgs.surveyNameWildcardHndlr.inputValue': 'beginsWith'
    }

    return payload


def get_data_DrillingPermitQuery(req_url,headers,payload):
    """      
    Parameters
    ----------
    post_response : TYPE, Requests Response
        DESCRIPTION. .

    Returns
    -------
    Request Response.
    """
    try:
        with requests.session() as s:
            s.verify = False
            res_get = s.get(url=req_url,headers=headers,verify=False)

            if res_get.status_code == 200:
                cookies = res_get.cookies
                res_post = s.post(url=query_url,data=payload,cookies=cookies,headers=headers,verify=False)

                return res_post

    except Exception as e:
        print(e)


def parse_query_data(post_resp):
    """      
    Parameters
    ----------
    post_response : TYPE, Requests Response
        DESCRIPTION. Response from the get_data_DrillingPermitQuery()

    Returns
    -------
    DataFrame.
    """
    soup = BeautifulSoup(post_resp.content,'html.parser')
    web_links = soup.select('a')
    href = [base_url + web_link['href'] for web_link in web_links if 'drillingPermitDetailAction' in web_link['href']]

    df_main = pd.read_html(StringIO(post_resp.text))[9]
    df_main.columns = df_main.iloc[1,:]
    df_main = df_main.drop([0,1]).copy()
    df_main = df_main.reset_index(drop=True).copy()

    df_edit = df_main.copy()
    df_edit['API NO.'] = int(df_main['API NO.'].str.split()[0][0])
    df_edit['HREF'] = href
    df_edit[['Submitted_Dt', 'Approved_Dt']] = df_edit['Status Date'].str.split(expand=True).loc[:,[1,3]]
    df_edit['Submitted_Dt'] = pd.to_datetime(df_edit['Submitted_Dt'])
    df_edit['Approved_Dt'] = pd.to_datetime(df_edit['Approved_Dt'])

    apprvd_Dt_max_idx = df_edit.index[df_edit['Approved_Dt']==df_edit['Approved_Dt'].max()]
    max_apprvd_Dt = df_edit['Approved_Dt'].max()
    href_link = df_edit.loc[apprvd_Dt_max_idx,'HREF'].item()

    return df_main, df_edit, href_link, max_apprvd_Dt, apprvd_Dt_max_idx.item()


def get_data_W1_Form(url_href):
    """      
    Parameters
    ----------
    url_href : TYPE, Str
        DESCRIPTION. URL from parse_query_data()

    Returns
    -------
    Response.
    """

    with requests.session() as s:
        s.verify = False
        res_get = s.get(url=url_href,headers=get_headers(),verify=False)
    
    return res_get


def parse_W1_Form(get_resp):
    """      
    Parameters
    ----------
    get_resp : TYPE, Requests Response
        DESCRIPTION. get response from get_data_W1_Form()

    Returns
    -------
    DataFrame
    """
    soup = BeautifulSoup(get_resp.text,'html.parser')
    tables = soup.find_all('table',class_='GroupBox1')

    data_dict = {}

    for count,table in enumerate(tables):
        if (table.find_all('th') is not None):
            for i,(th,td) in enumerate(zip(table.find_all('th'),table.find_all('td'))):
                if th.text == 'Horizontal Wellbore':
                    data_dict[th.text] = td.text
                elif th.text == 'Acres':
                    data_dict[th.text] = td.text

    return pd.DataFrame.from_dict(data_dict, orient='index').T

#### Main

In [10]:
api_list = df_api['API_10'].tolist()
# api_list = df_api['API_10'].loc[[5]].tolist()

dfs_main = []
dfs_edit = []

# Define Headers
headers = get_headers()

for idx, api in enumerate(api_list):

    payload_api = payload(api=api)

    response_from_DrillingPermit_Query = get_data_DrillingPermitQuery(req_url=query_url,headers=headers,payload=payload_api)

    df_mainPage, df_edit, w1_link, Approved_Dt, idx_Max_Aprvd_Dt = parse_query_data(post_resp=response_from_DrillingPermit_Query)

    response_from_W1_query = get_data_W1_Form(url_href=w1_link)

    df_w1 = parse_W1_Form(get_resp=response_from_W1_query)
    df_w1['Approved_Dt'] = Approved_Dt

    df_edit_merge_with_w1 = df_edit.merge(df_w1,on='Approved_Dt',how='left').reset_index(drop=True)

    dfs_edit.append(df_edit_merge_with_w1)
    dfs_main.append(df_mainPage)

    time.sleep(1)

In [11]:
pd.concat(dfs_edit).reset_index(drop=True)

Unnamed: 0,API NO.,District,Lease,Well Number,Permitted Operator,County,Status Date,Status Number,Wellbore Profiles,Filing Purpose,Amend,Total Depth,Stacked Lateral Parent Well DP #,Status,HREF,Submitted_Dt,Approved_Dt,Horizontal Wellbore,Acres
0,13544018,08,HEADLEE 5G,101HG,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896133,Horizontal,New Drill,N,10300,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-22,2023-12-28,Allocation,1762.8
1,13544019,08,HEADLEE 5G,102HD,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896134,Horizontal,New Drill,N,10300,896133,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-22,2023-12-28,"Allocation, Stacked Lateral",1762.8
2,13544020,08,HEADLEE 5G,103HF,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896135,Horizontal,New Drill,N,10300,896133,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-22,2023-12-28,"Allocation, Stacked Lateral",1762.8
3,13544021,08,HEADLEE 5X,104HG,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896138,Horizontal,New Drill,N,12500,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-22,2023-12-28,Allocation,1762.8
4,13544022,08,HEADLEE 5X,105HD,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896139,Horizontal,New Drill,N,12500,896138,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-22,2023-12-28,"Allocation, Stacked Lateral",1762.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,46142609,7C,DAISY 25,44BU,FIREBIRD ENERGY II LLC(101413),UPTON,Submitted: 12/05/2023 Approved: 12/20/2023,894614,Horizontal,New Drill,Y,9900,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-05,2023-12-20,Allocation,1300.64
99,46142610,7C,DAISY 23,46LS,SUMMIT PETROLEUM LLC(829221),UPTON,Submitted: 10/17/2023 Approved: 12/11/2023,894617,Horizontal,New Drill,N,9300,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-10-17,2023-12-11,,
100,46142610,7C,DAISY 25,46LS,FIREBIRD ENERGY II LLC(101413),UPTON,Submitted: 12/05/2023 Approved: 12/20/2023,894617,Horizontal,New Drill,Y,9300,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-12-05,2023-12-20,Allocation,1300.64
101,46142611,7C,DAISY 24,48MS,SUMMIT PETROLEUM LLC(829221),UPTON,Submitted: 10/18/2023 Approved: 12/11/2023,894647,Horizontal,New Drill,N,8700,,APPROVED,https://webapps2.rrc.texas.gov/EWA/drillingPer...,2023-10-18,2023-12-11,,


In [12]:
pd.concat(dfs_main).reset_index(drop=True)

1,API NO.,District,Lease,Well Number,Permitted Operator,County,Status Date,Status Number,Wellbore Profiles,Filing Purpose,Amend,Total Depth,Stacked Lateral Parent Well DP #,Status
0,13544018 Links Images GIS Viewer Completion,08,HEADLEE 5G,101HG,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896133,Horizontal,New Drill,N,10300,,APPROVED
1,13544019 Links Images GIS Viewer Completion,08,HEADLEE 5G,102HD,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896134,Horizontal,New Drill,N,10300,896133,APPROVED
2,13544020 Links Images GIS Viewer Completion,08,HEADLEE 5G,103HF,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896135,Horizontal,New Drill,N,10300,896133,APPROVED
3,13544021 Links Images GIS Viewer Completion,08,HEADLEE 5X,104HG,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896138,Horizontal,New Drill,N,12500,,APPROVED
4,13544022 Links Images GIS Viewer Completion,08,HEADLEE 5X,105HD,OVINTIV USA INC.(628658),ECTOR,Submitted: 12/22/2023 Approved: 12/28/2023,896139,Horizontal,New Drill,N,12500,896138,APPROVED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,46142609 Links Images GIS Viewer Completion,7C,DAISY 25,44BU,FIREBIRD ENERGY II LLC(101413),UPTON,Submitted: 12/05/2023 Approved: 12/20/2023,894614,Horizontal,New Drill,Y,9900,,APPROVED
99,46142610 Links Images GIS Viewer Completion,7C,DAISY 23,46LS,SUMMIT PETROLEUM LLC(829221),UPTON,Submitted: 10/17/2023 Approved: 12/11/2023,894617,Horizontal,New Drill,N,9300,,APPROVED
100,46142610 Links Images GIS Viewer Completion,7C,DAISY 25,46LS,FIREBIRD ENERGY II LLC(101413),UPTON,Submitted: 12/05/2023 Approved: 12/20/2023,894617,Horizontal,New Drill,Y,9300,,APPROVED
101,46142611 Links Images GIS Viewer Completion,7C,DAISY 24,48MS,SUMMIT PETROLEUM LLC(829221),UPTON,Submitted: 10/18/2023 Approved: 12/11/2023,894647,Horizontal,New Drill,N,8700,,APPROVED


In [15]:
# pd.concat(dfs_edit).reset_index(drop=True).to_excel(r"Results_W1_1_16_24.xlsx",index=False)
# pd.concat(dfs_main).to_excel(r"Results_mainWebpage_1_16_24.xlsx",index=False)