In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm.auto import tqdm

In [2]:
url_list = []
for url in ["http://wildstat.com/p/51/ch/EUR_EL", "http://wildstat.com/p/50/ch/EUR_CL"]:
    for i in list(range(2019, 2023)):
        url_list.append(f"{url}_{i}_{i+1}")

url_list

['http://wildstat.com/p/51/ch/EUR_EL_2019_2020',
 'http://wildstat.com/p/51/ch/EUR_EL_2020_2021',
 'http://wildstat.com/p/51/ch/EUR_EL_2021_2022',
 'http://wildstat.com/p/51/ch/EUR_EL_2022_2023',
 'http://wildstat.com/p/50/ch/EUR_CL_2019_2020',
 'http://wildstat.com/p/50/ch/EUR_CL_2020_2021',
 'http://wildstat.com/p/50/ch/EUR_CL_2021_2022',
 'http://wildstat.com/p/50/ch/EUR_CL_2022_2023']

In [3]:
def parse_data(soup, url):
    
    league = "UEFA Europa League" if "EUR_EL" in url else "UEFA Champions League"

    data=[]
    all_rows = (
        soup.find(name="div", attrs={"id" : "middle_col"})
        .find_all("div", attrs={"class" : "box"})
    )

    for row in all_rows:
            
        tr_len = len(row.select("div.content-rb > table > tr"))
        
        if tr_len!=0:
            tr_list=row.select("div.content-rb > table > tr")
            
            try:
                for i in [x for x in range(0, tr_len) if x % 2 == 0]:
                    flag = tr_list[i].select("td:nth-of-type(3) >img")[0]["src"]
                    
                    if flag == "http://wildstat.ru/img/flag/TUR.png":
                        
                        data.append({
                            "date" : pd.to_datetime(tr_list[i].select("td:nth-of-type(2)")[0].text.strip(), dayfirst=True),
                            "city" : tr_list[i+1].select("td:last-child > div")[0].text.strip(),
                            "stadium" : tr_list[i+1].select("td:last-child > div")[1].text.strip(),
                            "league" : league
                        })

            except IndexError:
                pass
            
    df = (
        pd.DataFrame(data)
        .query("city=='İstanbul'")
        .reset_index(drop=True)
        .drop("city", axis=1)
    )

    return df

In [4]:
def get_data(url):
    r = requests.get(url)

    if r.status_code==200:
        soup = BeautifulSoup(r.text, "lxml")

        df = parse_data(soup=soup, url=url)

        return df

In [5]:
df = pd.concat((get_data(url=url) for url in tqdm(url_list)), ignore_index=True)

  0%|          | 0/8 [00:00<?, ?it/s]

In [10]:
df = df.sort_values("date", ignore_index=True)

In [11]:
df

Unnamed: 0,date,stadium,league
0,2019-08-07 20:45:00,Başakşehir Fatih Terim Stadı,UEFA Champions League
1,2019-10-01 22:00:00,Türk Telekom Arena,UEFA Champions League
2,2019-10-03 19:55:00,Başakşehir Fatih Terim Stadı,UEFA Europa League
3,2019-10-03 19:55:00,Vodafone Arena (Beşiktaş Jimnastik Kulübü İnön...,UEFA Europa League
4,2019-10-22 22:00:00,Türk Telekom Arena,UEFA Champions League
5,2019-10-24 19:55:00,Başakşehir Fatih Terim Stadı,UEFA Europa League
6,2019-10-24 19:55:00,Vodafone Arena (Beşiktaş Jimnastik Kulübü İnön...,UEFA Europa League
7,2019-11-26 20:55:00,Türk Telekom Arena,UEFA Champions League
8,2019-11-28 20:55:00,Başakşehir Fatih Terim Stadı,UEFA Europa League
9,2019-11-28 20:55:00,Vodafone Arena (Beşiktaş Jimnastik Kulübü İnön...,UEFA Europa League


In [12]:
df.to_csv("ist_uefa_matches.csv", index=False)