In [10]:
# Standard library imports
import os
import warnings
import requests
import time

# Miscellaneous settings
%matplotlib inline
warnings.filterwarnings('ignore')

In [11]:
comps = [
    'E0', 'E1', 'E2', 'E3',
    'SC0', 'SC1',
    'D1', 'D2',
    'F1', 'F2',
    'I1', 'I2',
    'SP1', 'SP2',
    'B1',
    'G1',
    'N1',
    'P1',
    'T1',
]

seasons = [
    '2324', 
    #'2223', '2122', '2021',
    #'1920', '1819', '1718', '1617',
    #'1516', '1415', '1314', '1213',
    #'1112', '1011', 
    #'0910', '0809',
    #'0708', '0607', '0506', '0405',
    #'0304', '0203', '0102', '0001',
]


countries = [
    "ARG", "AUT", "BRA", "CHN",
    "DNK", "FIN", "IRL", "JPN",
    "MEX", "NOR", "POL", "ROU",
    "RUS", "SWE", "SWZ", "USA",
]

fixtures = [
    "fixtures",
    "new_league_fixtures"
]


In [12]:
# DOWNLOAD COMPETITION DATA

# Base URL
base_url = 'https://www.football-data.co.uk/mmz4281/{}/{}.csv'

# Iterate over seasons and competition codes
for season in seasons:
    for comp in comps:
        # Construct file URL
        file_url = base_url.format(season, comp)
        
        # Set the path where the file will be saved
        save_path = f'data/scraped/{season}/{comp}.csv'
        
        # Ensure the directory exists
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        
        try:
            # Download the file
            response = requests.get(file_url)
            
            # Check if the response was successful
            if response.status_code == 200:
                # Write the content to the file, overwriting if it exists
                with open(save_path, 'wb') as file:
                    file.write(response.content)
                
                print(f'Successfully downloaded and saved: {save_path}')
            else:
                print(f'Failed to download {file_url}. Status code: {response.status_code}')
            
            # Wait for 1 second to avoid overwhelming the server
            #time.sleep(1)
        except Exception as e:
            print(f'An error occurred while downloading {file_url}: {e}')

Successfully downloaded and saved: data/scraped/2324/E0.csv
Successfully downloaded and saved: data/scraped/2324/E1.csv
Successfully downloaded and saved: data/scraped/2324/E2.csv
Successfully downloaded and saved: data/scraped/2324/E3.csv
Successfully downloaded and saved: data/scraped/2324/SC0.csv
Successfully downloaded and saved: data/scraped/2324/SC1.csv
Successfully downloaded and saved: data/scraped/2324/D1.csv
Successfully downloaded and saved: data/scraped/2324/D2.csv
Successfully downloaded and saved: data/scraped/2324/F1.csv
Successfully downloaded and saved: data/scraped/2324/F2.csv
Successfully downloaded and saved: data/scraped/2324/I1.csv
Successfully downloaded and saved: data/scraped/2324/I2.csv
Successfully downloaded and saved: data/scraped/2324/SP1.csv
Successfully downloaded and saved: data/scraped/2324/SP2.csv
Successfully downloaded and saved: data/scraped/2324/B1.csv
Successfully downloaded and saved: data/scraped/2324/G1.csv
Successfully downloaded and saved: d

In [13]:
# DOWNLOAD COUNTRY DATA

# Base URL
base_url = 'https://www.football-data.co.uk/new/{}.csv'


for country in countries:
    # Construct file URL
    file_url = base_url.format(country)
    
    # Set the path where the file will be saved
    save_path = f'data/scraped/other/{country}.csv'
    
    # Ensure the directory exists
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
    try:
        # Download the file
        response = requests.get(file_url)
        
        # Check if the response was successful
        if response.status_code == 200:
            # Write the content to the file, overwriting if it exists
            with open(save_path, 'wb') as file:
                file.write(response.content)
            
            print(f'Successfully downloaded and saved: {save_path}')
        else:
            print(f'Failed to download {file_url}. Status code: {response.status_code}')
        
        # Wait for 1 second to avoid overwhelming the server
        #time.sleep(1)
    except Exception as e:
        print(f'An error occurred while downloading {file_url}: {e}')

Successfully downloaded and saved: data/scraped/other/ARG.csv
Successfully downloaded and saved: data/scraped/other/AUT.csv
Successfully downloaded and saved: data/scraped/other/BRA.csv
Successfully downloaded and saved: data/scraped/other/CHN.csv
Successfully downloaded and saved: data/scraped/other/DNK.csv
Successfully downloaded and saved: data/scraped/other/FIN.csv
Successfully downloaded and saved: data/scraped/other/IRL.csv
Successfully downloaded and saved: data/scraped/other/JPN.csv
Successfully downloaded and saved: data/scraped/other/MEX.csv
Successfully downloaded and saved: data/scraped/other/NOR.csv
Successfully downloaded and saved: data/scraped/other/POL.csv
Successfully downloaded and saved: data/scraped/other/ROU.csv
Successfully downloaded and saved: data/scraped/other/RUS.csv
Successfully downloaded and saved: data/scraped/other/SWE.csv
Successfully downloaded and saved: data/scraped/other/SWZ.csv
Successfully downloaded and saved: data/scraped/other/USA.csv


In [14]:
# DOWNLOAD FIXTURE DATA

# Base URL
base_url = 'https://www.football-data.co.uk/'


for fixture in fixtures:
    
    # Construct file URL
    file_url = base_url.format(fixture)
    
    # Set the path where the file will be saved
    save_path = f'data/scraped/fixtures/{fixture}.csv'
    
    # Ensure the directory exists
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
    try:
        # Download the file
        response = requests.get(file_url)
        
        # Check if the response was successful
        if response.status_code == 200:
            # Write the content to the file, overwriting if it exists
            with open(save_path, 'wb') as file:
                file.write(response.content)
            
            print(f'Successfully downloaded and saved: {save_path}')
        else:
            print(f'Failed to download {file_url}. Status code: {response.status_code}')
        
        # Wait for 1 second to avoid overwhelming the server
        #time.sleep(1)
    except Exception as e:
        print(f'An error occurred while downloading {file_url}: {e}')

Successfully downloaded and saved: data/scraped/fixtures/fixtures.csv
Successfully downloaded and saved: data/scraped/fixtures/new_league_fixtures.csv
