In [1]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
from bs4 import BeautifulSoup as bs
import requests
from tqdm import tqdm
import pandas as pd
import os
import pathlib
import pickle

class Scraper:
    def __init__(self, league:str, url:str='https://www.besoccer.com/competition', year: int=2022) -> None:
        pathlib.Path(f'Data/Results/{league}').mkdir(parents=True, exist_ok=True) 
        pathlib.Path(f'Data/To_Predict/{league}').mkdir(parents=True, exist_ok=True) 
        self.league = league
        self.url = url
        self.year = year
        r = requests.get(f"{self.url}/scores/{self.league}/{self.year}")
        time.sleep(1)
        soup = bs(r.content, 'html.parser')
        matchday_str = soup.find('div', {'class': 'panel-title'}).text
        self.matchday = [int(s) for s in matchday_str.split() if s.isdigit()][0]

    def get_previous_matches(self):
        results = {'Home_Team': [], 'Away_Team': [], 'Result': [], 'Link': [], 'Season': [], 'Round': [], 'League': []}
        for matchday in tqdm(range(1, self.matchday)):
            r = requests.get(f"{self.url}/scores/{self.league}/{self.year}/round{matchday}")
            time.sleep(1)
            soup = bs(r.content, 'html.parser')
            matches_box = soup.find('div', {'class': 'panel-body p0 match-list-new'})
            matches = matches_box.find_all('a', {'class': 'match-link'})
            for match in matches:
                home_team = match.find('div', {'class': 'team-info ta-r'}).find('div', {'class': 'name'}).text.strip()
                away_team = match.find_all('div', {'class': 'team-info'})[1].find('div', {'class': 'name'}).text.strip()
                home_score = match.find('div', {'class': 'marker'}).find('span', {'class': 'r1'}).text.strip()
                away_score = match.find('div', {'class': 'marker'}).find('span', {'class': 'r2'}).text.strip()
                results['Home_Team'].append(home_team)
                results['Away_Team'].append(away_team)
                results['Result'].append(f'{home_score}-{away_score}')
                results['Link'].append(match.get('href'))
                results['Season'].append(self.year)
                results['Round'].append(matchday)
                results['League'].append(self.league)
        df = pd.DataFrame(results)
        df.to_csv(f'Data/Results/{self.league}/Results_{self.year}_{self.league}.csv')
    
    def get_next_matches(self):
        results = {'Home_Team': [], 'Away_Team': [], 'Link': [], 'Season': [], 'Round': [], 'League': []}
        elo_dict = {}
        r = requests.get(f"{self.url}/scores/{self.league}/{self.year}/round{self.matchday + 1}")
        time.sleep(1)
        soup = bs(r.content, 'html.parser')
        matches_box = soup.find('div', {'class': 'panel-body p0 match-list-new'})
        matches = matches_box.find_all('a', {'class': 'match-link'})
        self.matches = matches

        for match in matches:
            home_team = match.find('div', {'class': 'team-info ta-r'}).find('div', {'class': 'name'}).text.strip()
            away_team = match.find_all('div', {'class': 'team-info'})[1].find('div', {'class': 'name'}).text.strip()
            results['Home_Team'].append(home_team)
            results['Away_Team'].append(away_team)
            results['Link'].append(match.get('href'))
            results['Season'].append(self.year)
            results['Round'].append(self.matchday + 1)
            results['League'].append(self.league)
            
        for link in results['Link']:
            time.sleep(3)
            r = requests.get(link + '/analysis')
            soup = bs(r.content, 'html.parser')
            elo_box = soup.find('div', {'class': 'panel-body pn compare-data'})
            elo_row = elo_box.find_all('tr')[1]
            home_elo = elo_row.find('td', {'class': 'team1-c'}).text.strip()
            away_elo = elo_row.find('td', {'class': 'team2-c'}).text.strip()
            elo_dict[link] = {'Elo_home': home_elo, 
                              'Elo_away': away_elo}

        df = pd.DataFrame(results)
        df.to_csv(f'Data/To_Predict/{self.league}/Results_{self.year}_{self.league}.csv')
        with open(f'Data/To_Predict/{self.league}/elo_dict.pkl', 'wb') as f:
            pickle.dump(elo_dict, f)


In [5]:
primera_division = Scraper('premier_league')
primera_division.get_next_matches()

In [8]:
with open('/Users/dev/aicore/Football-predictor/aicore-scraper/Data/To_Predict/premier_league/elo_dict.pkl', 'rb') as file:
    elo_dict = pickle.load(file)

In [9]:
elo_dict

{'https://www.besoccer.com/match/tottenham-hotspur-fc/brighton-amp-hov/20225672': {'Elo_home': '89',
  'Elo_away': '60'},
 'https://www.besoccer.com/match/manchester-united-fc/norwich-city-fc/20225669': {'Elo_home': '92',
  'Elo_away': '63'},
 'https://www.besoccer.com/match/southampton-fc/arsenal/20225671': {'Elo_home': '81',
  'Elo_away': '92'},
 'https://www.besoccer.com/match/watford-fc/brentford/20225673': {'Elo_home': '71',
  'Elo_away': '54'},
 'https://www.besoccer.com/match/newcastle-united-fc/leicester-city-fc/20225670': {'Elo_home': '69',
  'Elo_away': '79'},
 'https://www.besoccer.com/match/west-ham-united/burnley-fc/20225674': {'Elo_home': '79',
  'Elo_away': '69'},
 'https://www.besoccer.com/match/wolverhampton/manchester-city-fc/20225675': {'Elo_home': '58',
  'Elo_away': '93'},
 'https://www.besoccer.com/match/aston-villa-fc/liverpool/20225666': {'Elo_home': '63',
  'Elo_away': '89'},
 'https://www.besoccer.com/match/leeds-united-afc/chelsea-fc/20225668': {'Elo_home': '

In [7]:
epl = Scraper('premier_league')
epl.get_next_matches()

In [8]:
with open('/Users/dev/aicore/Football-predictor/aicore-scraper/Data/To_Predict/premier_league/elo_dict.pkl', 'rb') as file:
    elo_dict = pickle.load(file)
elo_dict

{'https://www.besoccer.com/match/liverpool/watford-fc/20225651': {'Elo_home': '90',
  'Elo_away': '70'},
 'https://www.besoccer.com/match/brighton-amp-hov/norwich-city-fc/20225646': {'Elo_home': '60',
  'Elo_away': '64'},
 'https://www.besoccer.com/match/burnley-fc/manchester-city-fc/20225647': {'Elo_home': '70',
  'Elo_away': '94'},
 'https://www.besoccer.com/match/chelsea-fc/brentford/20225648': {'Elo_home': '94',
  'Elo_away': '53'},
 'https://www.besoccer.com/match/leeds-united-afc/southampton-fc/20225650': {'Elo_home': '58',
  'Elo_away': '82'},
 'https://www.besoccer.com/match/wolverhampton/aston-villa-fc/20225655': {'Elo_home': '59',
  'Elo_away': '62'},
 'https://www.besoccer.com/match/manchester-united-fc/leicester-city-fc/20225652': {'Elo_home': '92',
  'Elo_away': '79'},
 'https://www.besoccer.com/match/west-ham-united/everton-fc/20225654': {'Elo_home': '79',
  'Elo_away': '83'},
 'https://www.besoccer.com/match/tottenham-hotspur-fc/newcastle-united-fc/20225653': {'Elo_home'