In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Scrape the season IDs and names

def _get_url_soup(link):
    response = requests.get(link)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup

def _extract_html_attribute(loop_item, html_tag1, html_class1, html_tag2=None, html_class2=None):
   # Find the primary tag and class
    primary_element = loop_item.find(html_tag1, class_=html_class1)

    # If a second tag is specified, find it inside the primary element
    if html_tag2:
        if html_class2:
            secondary_element = primary_element.find(html_tag2, class_=html_class2)
        else:
            secondary_element = primary_element.find(html_tag2)
        return secondary_element.text.strip()
    # If no second tag, extract from the primary element
    return primary_element.text.strip()


def _clean_score(df, column):
    df[column] = df[column].astype(str)
    df[column] = df[column].str.split('\r').str[0].str.strip()


def _extract_home(f_value):
    hyphen_index = f_value.find("-")
    return f_value[:hyphen_index] if hyphen_index != -1 else None


def _extract_away(f_value):
    hyphen_index = f_value.find("-")
    return f_value[hyphen_index + 1:] if hyphen_index != -1 else None


def _create_goals_col(df, new_col, func):
    df[new_col] = df['Score'].apply(func)


In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Create an empty list to store data
all_data = []

# Scrape the season IDs and names
soup = _get_url_soup("https://fulltime.thefa.com/index.html?league=847518011&selectedSeason=249912101&selectedDivision=272973748&selectedCompetition=0&selectedFixtureGroupKey=1_467585716")

# Find the elements that contain the season links and IDs
select_element = soup.find("select", id="form1_selectedSeason")
season_elements = select_element.find_all("option")

# Create a dictionary to hold season names and IDs
seasons = {}
for element in season_elements:
    season_id = element['value']  # Extract the season ID
    season_name = element.text.strip()  # Get the season name
    seasons[season_id] = season_name  # Store with ID as key

# Loop through each season and scrape data
for season_id, season_name in seasons.items():
    print(f"Scraping season: {season_name}")  # Print season being scraped

    soup = _get_url_soup(f"https://fulltime.thefa.com/results/1/10000.html?selectedSeason={season_id}&selectedFixtureGroupAgeGroup=0&previousSelectedFixtureGroupAgeGroup=&selectedFixtureGroupKey=&previousSelectedFixtureGroupKey=&selectedDateCode=all&selectedRelatedFixtureOption=2&selectedClub=&previousSelectedClub=&selectedTeam=")
    
    # Find all fixtures inside the 'tbody' class
    fixtures = soup.find_all("div", class_="tbody")[0].find_all("div", id=lambda x: x and x.startswith("fixture-"))

    # Extract competition, date, home team, away team, score for each fixture
    for fixture in fixtures:
        fixture_id = fixture['id'].split("fixture-")[1]
        competition = _extract_html_attribute(fixture, "div", "type-col", "p")
        home_team = _extract_html_attribute(fixture, "div", "home-team-col", "div", "team-name")
        away_team = _extract_html_attribute(fixture, "div", "road-team-col", "div", "team-name")
        date = _extract_html_attribute(fixture, "div", "datetime-col", "span")
        score = _extract_html_attribute(fixture, "div", "score-col")

        # Append data, including the season name
        all_data.append([fixture_id, season_name, competition, date, home_team, away_team, score]) 

# Store data in a pandas DataFrame
fixtures_df = pd.DataFrame(all_data, columns=["Fixture_ID", "Season", "Competition", "Date", "Home_Team", "Away_Team", "Score"])  

# Clean Score column
_clean_column(fixtures_df, 'Score')

# Extract Home and Away goal columns
_create_goals_col(fixtures_df, 'Homegoals', _extract_home)
_create_goals_col(fixtures_df, 'Awaygoals', _extract_away)

print(fixtures_df)

# Save as CSV
fixtures_df.to_csv("al_historical_results.csv", index=False)

Scraping season: 2012-13
Scraping season: 2013-14
Scraping season: 2014-15
Scraping season: 2015-16
Scraping season: 2016-17
Scraping season: 2017-18
Scraping season: 2018-19
Scraping season: 2019-20
Scraping season: 2020-21
Scraping season: 2021-22
Scraping season: 2022-23
Scraping season: 2023-24
Scraping season: 2024-25
     Fixture_ID   Season Competition      Date  \
0      12649002  2012-13        DIV5  25/05/13   
1      12649001  2012-13        DIV5  18/05/13   
2      12637620  2012-13        PREM  11/05/13   
3      12639446  2012-13        PREM  11/05/13   
4      12639447  2012-13        DIV1  11/05/13   
...         ...      ...         ...       ...   
9014   27109896  2024-25        SCHL  07/09/24   
9015   27109901  2024-25        SCHL  07/09/24   
9016   27109902  2024-25        SCHL  07/09/24   
9017   27109903  2024-25        SCHL  07/09/24   
9018   27109904  2024-25        SCHL  07/09/24   

                           Home_Team            Away_Team  Score Homegoals

In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def _get_url_soup(link):
    response = requests.get(link)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup

url = 'https://fulltime.thefa.com/results.html?selectedSeason=572282993&selectedFixtureGroupAgeGroup=0&selectedFixtureGroupKey=&selectedRelatedFixtureOption=3&selectedClub=&selectedTeam=&selectedDateCode=all&previousSelectedFixtureGroupAgeGroup=&previousSelectedFixtureGroupKey=1_797104032&previousSelectedClub='

soup = _get_url_soup(url)

table = soup.find("div", class_="tbody")
data = table.find("div")
scores = data.find("div", class_="score-col")
print(scores)

<div class="score-col">
<span class="center smaller block desktop">Home Walkover</span>
										
										H - W 
											
									</div>
