In [1]:
import requests
from datetime import datetime
import os

In [2]:
# Repository details
repo = "vaastav/Fantasy-Premier-League"  # Corrected format
branch = "master"
pat = ""

In [3]:
# List of year folders (as specified)
years = [ "2025-26"]
# List of specific CSV files (corrected to match repository)
specific_csvs = [ "players_raw.csv", "cleaned_players.csv", "player_idlist.csv"]
# Local directory to save files
output_dir = "C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects"

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Headers for GitHub API and raw downloads
headers = {}
if pat:
    headers["Authorization"] = f"token {pat}"

In [4]:
# Function to download a specific file via raw URL
def download_specific_file(repo, branch, file_path, headers, output_dir):
    raw_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{file_path}"
    response = requests.get(raw_url, headers=headers)
    
    if response.status_code == 200:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{file_path.replace('/', '_')}_{timestamp}"
        output_path = os.path.join(output_dir, filename)
        
        with open(output_path, "wb") as f:
            f.write(response.content)
        print(f"Downloaded: {output_path}")
        return True
    else:
        print(f"Failed to download {file_path}: {response.status_code} (File or path may not exist)")
        return False

In [5]:
# Function to fetch and download all files from a folder (e.g., "gws")
def fetch_and_download_from_folder(repo, folder_path, branch, headers, output_dir):
    api_url = f"https://api.github.com/repos/{repo}/contents/{folder_path}?ref={branch}"
    try:
        response = requests.get(api_url, headers=headers)
        if response.status_code == 200:
            contents = response.json()
            if isinstance(contents, list) and len(contents) > 0:
                downloaded_count = 0
                for item in contents:
                    if item.get("type") == "file":
                        if download_specific_file(repo, branch, item["path"], headers, output_dir):
                            downloaded_count += 1
                print(f"Downloaded {downloaded_count} files from {folder_path}")
            else:
                print(f"No files found in {folder_path} or it's empty")
        else:
            error_msg = response.json().get('message', 'Unknown error') if response.status_code != 404 else 'Folder not found'
            print(f"Failed to fetch folder {folder_path}: {response.status_code} - {error_msg}")
    except requests.exceptions.RequestException as e:
        print(f"Network error accessing {folder_path}: {e}")

In [6]:
# Main process
print("Starting downloads from Fantasy-Premier-League repo...")
total_downloads = 0
for year in years:
    year_path = f"data/{year}"
    print(f"\nProcessing year: {year}")
    
    # Download the 4 specific CSVs from the year folder
    csv_downloads = 0
    for csv_file in specific_csvs:
        file_path = f"{year_path}/{csv_file}"
        if download_specific_file(repo, branch, file_path, headers, output_dir):
            csv_downloads += 1
    print(f"Downloaded {csv_downloads} specific CSVs for {year}")
    total_downloads += csv_downloads
    
    # Fetch and download all files from the "gws" subfolder
    gws_path = f"{year_path}/gws"
    fetch_and_download_from_folder(repo, gws_path, branch, headers, output_dir)

print(f"\nDownload complete! Total specific CSVs saved: {total_downloads}")
print(f"All files saved to: {os.path.abspath(output_dir)}")

Starting downloads from Fantasy-Premier-League repo...

Processing year: 2025-26
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_players_raw.csv_20250916_030025
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_cleaned_players.csv_20250916_030025
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_player_idlist.csv_20250916_030025
Downloaded 3 specific CSVs for 2025-26
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_gws_gw1.csv_20250916_030026
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_gws_gw2.csv_20250916_030026
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_gws_gw3.csv_20250916_030026
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_gws_merged_gw.csv_20250916_030026
Downloaded: C:/Users/JesseOnu/Downloads/Datasets/DataAnalystProjects\data_2025-26_gws_