In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import numpy as np
import pandas as pd
from transfermarkt import TransferMarkt

In [8]:
options = Options()
options.page_load_strategy = 'none'

driver = webdriver.Chrome("C:/Users/yoshi/webdriver/chromedriver_win32/chromedriver.exe", options=options)

headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.84"}

# transfermarktのホームページ開いて動的に操作したい場合(各リーグの詳細テーブルのURLを取得したい時など)はdriver引数の設定が必要
# 詳細テーブルのURLからscraypingしたい時はrequestsに渡すheaders引数を設定する必要がある
tf = TransferMarkt(driver, headers)

output_df = tf.make_empty_df()
error_df = pd.DataFrame(np.zeros((0, 4)), columns=["Country", "Competition", "Error", "Message"])

# とりあえず"Japan"と"England" ["Japan", "England"]
# 次に ["Italy", "Spain"]
# ここのリストに取得したい国名を入れる
# tf.search_all_country()で全世界の国名入ったリストが得られる
# ここではGermanyの全てのリーグのデータを取得する
for country in ["Germany"]:
    # その国の全てのcompetiotonを検索
    for i, comp in enumerate(tf.search_competition(country)):
        
        try:
            if i == 0:
                detail_url = tf.guide_from_country_and_league_to_leagueurl(country, comp)
            else:
                detail_url = tf.guide_from_country_and_league_to_leagueurl(country, comp, same_country=True)
        except Exception as e:
            print("URL-ERROR:", country, ":", comp)
            print("MESSAGE: ", e)
            error_info = np.array([country, comp, "URL-ERROR", e]).reshape(1, -1)
            error_info_df = pd.DataFrame(error_info, columns=error_df.columns)
            error_df = pd.concat([error_df, error_info_df])
            continue

        if detail_url:
            try:
                # detail_urlがNoneではなかったらスクレイピングを行う
                df = tf.scrayping_this_league(detail_url)
            except Exception as e:
                print("SCRAYPING-ERROR:", country, ":", comp)
                print("MESSAGE: ", e)
                error_info = np.array([country, comp, "SCRAYPING-ERROR", e]).reshape(1, -1)
                error_info_df = pd.DataFrame(error_info, columns=error_df.columns)
                error_df = pd.concat([error_df, error_info_df])
            else:
                output_df = pd.concat([output_df, df])
                print("OK:", country, ":", comp)
            
# ここのファイル名もその都度変える!!!! 上書きされないように!!!
output_df.to_csv("./output_dataframe/Germany.csv", index=False)
output_df.to_csv("./output_dataframe/Germany_Error.csv", index=False)

driver.close()
driver.quit()

print("done!")


OK: Germany : Bundesliga
OK: Germany : 2. Bundesliga
OK: Germany : 3. Liga
OK: Germany : Regionalliga Nord
OK: Germany : Regionalliga Nordost
OK: Germany : Regionalliga West
OK: Germany : Regionalliga Südwest
OK: Germany : Regionalliga Bayern
OK: Germany : Oberliga Westfalen
OK: Germany : Oberliga Niederrhein
OK: Germany : Mittelrheinliga
OK: Germany : Bayernliga Nord
OK: Germany : Bayernliga Süd
OK: Germany : Oberliga Rheinland-Pfalz/Saar
OK: Germany : Oberliga Baden-Württemberg
OK: Germany : Hessenliga
OK: Germany : Oberliga Hamburg
OK: Germany : NOFV-Oberliga Nord
OK: Germany : NOFV-Oberliga Süd
OK: Germany : Bremenliga
OK: Germany : Oberliga Schleswig-Holstein
OK: Germany : Oberliga Niedersachsen
OK: Germany : Landesliga Bremen
OK: Germany : Verbandsliga Mecklenburg-Vorpommern
OK: Germany : Landesliga Lüneburg
OK: Germany : Berlin-Liga
OK: Germany : Verbandsliga Sachsen-Anhalt
OK: Germany : Landesliga Sachsen
OK: Germany : Rheinlandliga
OK: Germany : Verbandsliga Südwest
OK: German

In [9]:
import os

In [10]:
os.path.exists("./output_dataframe/")

True