In [5]:
#FUNCTION TO SAVE ALL MATCH DATA IN ONE FILE
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_and_append_tables(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')

        # Use the class selector to find all tables with the specified class
        tables = soup.find_all('table', class_='wf-table-inset mod-overview')

        # Initialize an empty list to store DataFrames
        dfs = []

        # Convert each table to a DataFrame and append to the list
        for table in tables:
            df = pd.read_html(str(table))[0]
            dfs.append(df)

        # If there are no tables, return None
        if not dfs:
            return None

        # Read the existing CSV file if it exists, or create an empty DataFrame
        try:
            existing_df = pd.read_csv("2023_vct_all_matches.csv")
        except FileNotFoundError:
            existing_df = pd.DataFrame()

        # Concatenate the existing DataFrame with the new tables
        updated_df = pd.concat([existing_df] + dfs, ignore_index=True)

        # Save the updated DataFrame to the CSV file
        updated_df.to_csv("2023_vct_all_matches.csv", index=False)

        return updated_df

    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error ({errh.response.status_code}): {errh}")
        print(f"Skipping {url} due to an error.\n")
        return None

    except requests.exceptions.RequestException as err:
        print(f"Something went wrong: {err}")
        print(f"Skipping {url} due to an error.\n")
        return None


In [6]:
# FUNCTION TO EXTRACT ALL HREF LINKS

def extract_href_links(base_url):
    href_links = []

    # Start at the initial URL
    current_url = base_url

    while True:
        try:
            # Make a request to the current URL
            response = requests.get(current_url)
            response.raise_for_status()

            # Parse the HTML content of the page
            soup = BeautifulSoup(response.text, 'html.parser')

            # Extract href links and add them to the list
            href_links.extend([a['href'] for a in soup.find_all('a', class_='wf-module-item')])

            # Find the next page link (if any)
            next_page_link = soup.find('a', class_='next-page')
            if next_page_link:
                # Update the current URL for the next iteration
                current_url = f"{base_url}/{next_page_link['href']}"
            else:
                break  # Exit the loop if there is no next page link

        except requests.exceptions.HTTPError as errh:
            print(f"HTTP Error ({errh.response.status_code}): {errh}")
            break  # Exit the loop if an HTTP error occurs

        except requests.exceptions.RequestException as err:
            print(f"Something went wrong: {err}")
            break  # Exit the loop if a general error occurs

    return href_links


In [28]:
# Example usage:
base_url = 'https://www.vlr.gg/event/matches/1188/champions-tour-2023-lock-in-s-o-paulo/?series_id=2756'
#change url for data needed
#base_url = 'https://www.vlr.gg/event/matches/1657/valorant-champions-2023/?series_id=all'
href_links = extract_href_links(base_url)

# Process the extracted href links
for i, href_link in enumerate(href_links):
    full_url = f"https://www.vlr.gg{href_link}"
    print(f"Attempting to access: {full_url}")

    result_df = scrape_and_append_tables(full_url)

    if result_df is not None:
        print(f"Data from {full_url} appended to '2023_vct_all_matches.csv'.\n")

    # Add a delay between requests
    time.sleep(1)  # Adjust the delay as needed

Attempting to access: https://www.vlr.gg/167391/loud-vs-drx-champions-tour-2023-lock-in-s-o-paulo-sf
Data from https://www.vlr.gg/167391/loud-vs-drx-champions-tour-2023-lock-in-s-o-paulo-sf appended to '2023_vct_all_matches.csv'.

Attempting to access: https://www.vlr.gg/167392/natus-vincere-vs-fnatic-champions-tour-2023-lock-in-s-o-paulo-sf
Data from https://www.vlr.gg/167392/natus-vincere-vs-fnatic-champions-tour-2023-lock-in-s-o-paulo-sf appended to '2023_vct_all_matches.csv'.

Attempting to access: https://www.vlr.gg/167393/loud-vs-fnatic-champions-tour-2023-lock-in-s-o-paulo-gf
Data from https://www.vlr.gg/167393/loud-vs-fnatic-champions-tour-2023-lock-in-s-o-paulo-gf appended to '2023_vct_all_matches.csv'.

