In [None]:
import concurrent.futures
import logging
import os

import pandas as pd
import requests

In [None]:
logging.basicConfig(
    level=logging.DEBUG, format="%(levelname)s - %(asctime)s - %(message)s"
)

In [None]:
players = pd.read_csv(os.path.join("data", "data_pictures_only.csv"))

In [None]:
img_src_list = list(players["image"])
birthplace_list = list(players["birthplace"])
combined_list = list(zip(birthplace_list, img_src_list))

In [None]:
for i in players.birthplace.unique():
    os.mkdir(os.path.join("pics", i))

In [None]:
headers = {
    "Host": "img.a.transfermarkt.technology",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "cross-site",
    "TE": "trailers",
}


def get_img(birthplace, img_src):
    filename = img_src.split("/")[-1]
    with requests.get(img_src, stream=True, headers=headers) as r:
        if r.status_code == 200:
            with open(os.path.join("pics", birthplace, filename), "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)


with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    future_to_url = {
        executor.submit(get_img, birthplace, img_src): (birthplace, img_src)
        for birthplace, img_src in combined_list
    }
    for future in concurrent.futures.as_completed(future_to_url):
        future.result()
