In [40]:
import requests
from bs4 import BeautifulSoup
import urllib.parse
import pandas as pd

def get_table_urls(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all rows in the table
    rows = soup.find_all('tr')

    # Initialize a list to hold the URLs
    urls = []

    # Loop through each row to extract form actions and input values
    for row in rows:
        form = row.find('form', class_='btn-form')
        if form:
            # Extract the form action attribute
            action = form['action']

            # Extract the base URL from the page URL
            base_url = urllib.parse.urljoin(page_url, action)

            # Find all input elements within the form
            inputs = form.find_all('input')

            # Create a dictionary of input names and values, handling missing 'name' attributes
            params = {input.get('name', ''): input.get('value', '').strip() for input in inputs}

            # Construct the URL with encoded parameters
            query_string = urllib.parse.urlencode(params)
            url = f"{base_url}?{query_string}"

            # Append the URL to the list
            urls.append(url)

    return urls

# Define the URL of the page containing the table
page_url = 'https://zonalvaluefinderph.com/zonal-values-all-cities'
urls = get_table_urls(page_url)

# Print the extracted URLs
for url in urls:
    print(url)

https://zonalvaluefinderph.com/zonal-values/?city=SOUTH%2C++QUEZON+CITY&province=METRO+MANILA&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=ALORAN&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=BALIANGAO&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=BONIFACIO&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=CALAMBA&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=CLARIN&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=CONCEPCION&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=DON+VICTORIANO+CHIONGBIAN%2A&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefinderph.com/zonal-values/?city=JIMENEZ&province=MISAMIS+OCCIDENTAL&=View+Zonal+Values
https://zonalvaluefin

In [52]:
def extract_data(url):
  headers = {
    "Cookie": "PHPSESSID=efcfnio3gnhiuq5dg0h5kn6ngd; 42205=1699759017502-513594619; _ga=GA1.1.108069233.1699759020; Lyp1CWKh=AxlSzcGLAQAAhWcardUZgXeYyFFkp8FGW2gwSZt8dMnkLrEAj3az1q_Hty_GAbS-P6WuctQxwH8AAEB3AAAAAA==; TS01e0ca52=01ba3f5e9682f52132a7caed69ce5d2f85a23d39b79bf080f19d7af1cd2efbe36046eff2f1796b0f1b71c6a018e39722b51dabdf5e; OClmoOot=A-VNiMGLAQAAWKNYkFcQe0sydi9bFTxmsHfVB-ZvQpr1ote12KeqWyVDlRiMAbS-P6WuctQxwH8AAEB3AAAAAA|1|1|0089bd88909c9a03e9ee6e24ea0650382dbf5df0; _ga_W4V0QFCBR4=GS1.1.1699766699.3.0.1699766699.0.0.0; 422003=oBUWdYAUAjybn03Bmf3oBsSmzcqsvoRLAPILYVkJlmZM0Hir0xHy20C/ION0e5lH/A08nWfco1o6Lo4GioLQRYXr8gFpl6i6giqSmFhH1isTs4+lWZwyyX78tgIiiu3gBZmxo+2pLBQgTDRubY+aQpdaKyXz2gITK97OOkS23KHYBEvT",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
  }

  r = requests.get(url, headers=headers)
  soup = BeautifulSoup(r.text, 'html.parser')
  headers = [x.text for x in soup.find_all("th")]
  headers = ['Municipality']+headers
  tbody = soup.find('tbody')
  h1_text = ""
  for h1 in soup.find_all("h1"):
    h1_text = h1.text
    break  # Assuming you only want the first h1

  # Use a different variable name, not 'list'
  df_rows = []

  for i in tbody.find_all("tr"):
    row_list = []
    # Insert the h1 text at the beginning of each row
    row_list.append(h1_text)
    for j in i.find_all("td"):
        row_list.append(j.text)
    df_rows.append(row_list)

  # Handle the case where no table rows are found
  if not df_rows:
    print(f"Warning: No data found for URL: {url}")
    return None  # Or return an empty DataFrame if you prefer

  # Create DataFrame
  df = pd.DataFrame(df_rows)
  df.columns = headers

  return df

In [53]:
all_data = []
for i in urls:
  df = extract_data(i)
  all_data.append(df)
  print(i)
  print(df)
  print("\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
..                                                ...        ...   
108  Zonal Values LIANGA, SURIGAO DEL SUR PHILIPPINES      UNION   
109  Zonal Values LIANGA, SURIGAO DEL SUR PHILIPPINES      UNION   
110  Zonal Values LIANGA, SURIGAO DEL SUR PHILIPPINES   VALENCIA   
111  Zonal Values LIANGA, SURIGAO DEL SUR PHILIPPINES   VALENCIA   
112  Zonal Values LIANGA, SURIGAO DEL SUR PHILIPPINES   VALENCIA   

                   Street Vicinity Class Price per SQM  
0       ALONG THE HIGHWAY             RR        3100.0  
1    ALONG BARANGAY ROADS             RR        3000.0  
2                          SUBURBS    RR        2900.0  
3           BARANGAY ROAD             RR        2500.0  
4       ALONG THE HIGHWAY             RR        2600.0  
..                    ...      ...   ...           ...  
108   ALONG BARANGAY ROAD             RR         700.0  
109                        SUBURBS    RR         650.0  
110  ALONG MU

In [None]:
all_data

In [54]:
column_name = ['Municipality', 'Barangay', 'Street', 'Vicinity', 'Class', 'Price per SQM']

In [55]:
compiled = pd.concat(all_data, ignore_index=True)
compiled.columns = column_name
print(compiled)

                                             Municipality   Barangay  \
0       Zonal Values SOUTH,  QUEZON CITY, METRO MANILA...    BOTOCAN   
1       Zonal Values SOUTH,  QUEZON CITY, METRO MANILA...    BOTOCAN   
2       Zonal Values SOUTH,  QUEZON CITY, METRO MANILA...    BOTOCAN   
3       Zonal Values SOUTH,  QUEZON CITY, METRO MANILA...    BOTOCAN   
4       Zonal Values SOUTH,  QUEZON CITY, METRO MANILA...    BOTOCAN   
...                                                   ...        ...   
707580   Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES  POBLACION   
707581   Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES  POBLACION   
707582   Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES  AGUINALDO   
707583   Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES  AGUINALDO   
707584   Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES  AGUINALDO   

                Street                 Vicinity Class Price per SQM  
0         BOTOCAN ROAD                   NAWASA    RR         770

In [56]:
final = pd.DataFrame(compiled)

In [57]:
final

Unnamed: 0,Municipality,Barangay,Street,Vicinity,Class,Price per SQM
0,"Zonal Values SOUTH, QUEZON CITY, METRO MANILA...",BOTOCAN,BOTOCAN ROAD,NAWASA,RR,77000
1,"Zonal Values SOUTH, QUEZON CITY, METRO MANILA...",BOTOCAN,BIGNAY ST.,LANGKA-PAJO,RR,67000
2,"Zonal Values SOUTH, QUEZON CITY, METRO MANILA...",BOTOCAN,GARCIA ST.,XAVIERVILLE PH III,RR,79000
3,"Zonal Values SOUTH, QUEZON CITY, METRO MANILA...",BOTOCAN,M. JHOCSON ST.,XAVIERVILLE PH III,RR,79000
4,"Zonal Values SOUTH, QUEZON CITY, METRO MANILA...",BOTOCAN,KAMIAS RD.,ANONAS,RR,85000
...,...,...,...,...,...,...
707580,"Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES",POBLACION,ALL LOTS,ALONG MUN./ NAT'L ROAD,CR,600
707581,"Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES",POBLACION,,INTERIOR LOTS,CR,360
707582,"Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES",AGUINALDO,ALL LOTS,ALONG NATIONAL ROAD,CR,545
707583,"Zonal Values NAGA, ZAMBOANGA SIBUGAY PHILIPPINES",AGUINALDO,,ALONG MUNICIPAL ROAD,CR,450


In [58]:
final.to_csv('final.csv', index=False)