# MotoGP Data Download

This notebook downloads MotoGP datasets from GitHub and saves them to the `/data/raw` directory.

In [1]:
import requests
import pandas as pd
import os
from pathlib import Path

In [2]:
# Configuration - Edit paths here as needed
DATA_URLS_FILE = '../docs/data_urls.txt'  # Path to file containing dataset URLs
OUTPUT_DIR = '../data/raw'           # Directory to save downloaded files

print(f"Configuration:")
print(f"  URLs file: {DATA_URLS_FILE}")
print(f"  Output directory: {OUTPUT_DIR}")

Configuration:
  URLs file: ../docs/data_urls.txt
  Output directory: ../data/raw


In [None]:
def load_urls_from_file(file_path=None):
    """
    Load URLs from data_urls.txt file and extract filenames
    """
    if file_path is None:
        file_path = DATA_URLS_FILE
        
    urls = {}
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if line and line.startswith('http'):
                    # Extract filename from URL
                    filename = line.split('/')[-1]
                    urls[filename] = line
                    print(f"✓ Loaded: {filename}")
                elif line:
                    print(f"⚠ Skipped line {line_num}: Not a valid URL")
        
        print(f"\nTotal URLs loaded: {len(urls)}")
        return urls
        
    except FileNotFoundError:
        print(f"Error: Could not find {file_path}")
        return {}
    except Exception as e:
        print(f"Error reading file: {e}")
        return {}

# Load dataset URLs dynamically from configured file
print("Loading URLs from configured file...")
urls = load_urls_from_file()

if not urls:
    print("No URLs loaded. Please check the data file path in configuration.")
else:
    print(f"\nReady to download {len(urls)} datasets:")
    for filename in urls.keys():
        print(f"  - {filename}")

Loading URLs from configured file...
✓ Loaded: constructure-world-championship.csv
✓ Loaded: grand-prix-events-held.csv
✓ Loaded: grand-prix-race-winners.csv
✓ Loaded: riders-finishing-positions.csv
✓ Loaded: riders-info.csv
✓ Loaded: same-nation-podium-lockouts.csv

Total URLs loaded: 6

Ready to download 6 datasets:
  - constructure-world-championship.csv
  - grand-prix-events-held.csv
  - grand-prix-race-winners.csv
  - riders-finishing-positions.csv
  - riders-info.csv
  - same-nation-podium-lockouts.csv


In [4]:
# Ensure output directory exists
data_dir = Path(OUTPUT_DIR)
data_dir.mkdir(parents=True, exist_ok=True)

# Show both configured and resolved paths for clarity
print(f"Configured path: {OUTPUT_DIR}")
print(f"Resolved path: {data_dir.resolve()}")
print(f"Directory exists: {data_dir.exists()}")

Configured path: ../data/raw
Resolved path: /Users/diogosilva/Developer/Projects/motogp-analytics/data/raw
Directory exists: True


In [5]:
def download_csv(filename, url, data_dir):
    """
    Download CSV file from URL and save to data directory
    """
    filepath = data_dir / filename
    
    try:
        print(f"Downloading {filename}...")
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(response.text)
        
        print(f"✓ Successfully downloaded {filename}")
        return True
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error downloading {filename}: {e}")
        return False
    except Exception as e:
        print(f"✗ Unexpected error with {filename}: {e}")
        return False

In [6]:
# Download all datasets
print("Starting download process...\n")

successful_downloads = []
failed_downloads = []

for filename, url in urls.items():
    if download_csv(filename, url, data_dir):
        successful_downloads.append(filename)
    else:
        failed_downloads.append(filename)
    print()  # Empty line for readability

print(f"Download summary:")
print(f"✓ Successful: {len(successful_downloads)}/{len(urls)}")
if failed_downloads:
    print(f"✗ Failed: {failed_downloads}")

Starting download process...

Downloading constructure-world-championship.csv...
✓ Successfully downloaded constructure-world-championship.csv

Downloading grand-prix-events-held.csv...
✓ Successfully downloaded grand-prix-events-held.csv

Downloading grand-prix-race-winners.csv...
✓ Successfully downloaded grand-prix-race-winners.csv

Downloading riders-finishing-positions.csv...
✓ Successfully downloaded riders-finishing-positions.csv

Downloading riders-info.csv...
✓ Successfully downloaded riders-info.csv

Downloading same-nation-podium-lockouts.csv...
✓ Successfully downloaded same-nation-podium-lockouts.csv

Download summary:
✓ Successful: 6/6


In [None]:
print("\nData download completed!")
print(f"Files saved to: {data_dir.resolve()}")

# Show actual files in directory
if data_dir.exists():
    files = list(data_dir.glob("*.csv"))
    if files:
        print(f"Files in directory ({len(files)}):")
        for file in sorted(files):
            print(f"  - {file.name}")
    else:
        print("No CSV files found in directory.")