In [4]:
# Import packages
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define scraper
def scrape_weekly_chart(start_date: str, output_file: str):
    """
    Scrape Official UK Singles Chart from start_date until the most recent Sunday.
    Dates must be in YYYY-MM-DD format (Sundays).
    Saves results to a CSV file in Google Drive.
    Returns a pandas DataFrame.
    """
    base_url = "https://www.officialcharts.com/charts/singles-chart/"
    start = datetime.strptime(start_date, "%Y-%m-%d")

    # Find the most recent Sunday
    today = datetime.today()
    days_since_sunday = (today.weekday() - 6) % 7  # Sunday = 6
    end = today - timedelta(days=days_since_sunday)

    all_records = []

    current_date = start
    while current_date <= end:
        # Format URL (YYYYMMDD)
        url_date = current_date.strftime("%Y%m%d")
        url = f"{base_url}{url_date}/7501/"

        print(f"Scraping: {url}")
        r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        if r.status_code != 200:
            print(f"Failed to fetch {url}")
            current_date += timedelta(days=7)
            continue

        soup = BeautifulSoup(r.text, "html.parser")

        # Each chart item
        items = soup.find_all("div", class_="chart-item-content")
        for item in items:
            # Position
            pos_tag = item.find("div", class_="position").find("strong")
            position = int(pos_tag.text.strip()) if pos_tag else None

            # Track title
            track_tag = item.find("a", class_="chart-name")
            track = track_tag.text.strip() if track_tag else None

            # Artist
            artist_tag = item.find("a", class_="chart-artist")
            artist = artist_tag.text.strip() if artist_tag else None

            # Append record
            all_records.append({
                "date": current_date.strftime("%Y-%m-%d"),
                "position": position,
                "track": track,
                "artist": artist
            })

        # Next week
        current_date += timedelta(days=7)

    # Create DataFrame
    df = pd.DataFrame(all_records)

    # Save to Google Drive
    df.to_csv(output_file, index=False)
    print(f"\n Data saved to {output_file} ({len(df)} rows)")

    return df


# Run scraper from 1999-12-26 to present day
output_path = "/content/drive/MyDrive/top_singles.csv"
top_singles = scrape_weekly_chart("1999-12-26", output_path)

print(top_singles.head(15))



Mounted at /content/drive
Scraping: https://www.officialcharts.com/charts/singles-chart/19991226/7501/
Scraping: https://www.officialcharts.com/charts/singles-chart/20000102/7501/
Scraping: https://www.officialcharts.com/charts/singles-chart/20000109/7501/
Scraping: https://www.officialcharts.com/charts/singles-chart/20000116/7501/
Scraping: https://www.officialcharts.com/charts/singles-chart/20000123/7501/

✅ Data saved to /content/drive/MyDrive/top_singles.csv (500 rows)
          date  position                                   track  \
0   1999-12-26         1       I HAVE A DREAM/SEASONS IN THE SUN   
1   1999-12-26         2                   THE MILLENNIUM PRAYER   
2   1999-12-26         3                                 IMAGINE   
3   1999-12-26         4            MR. HANKEY THE CHRISTMAS POO   
4   1999-12-26         5      RE-REWIND THE CROWD SAY BO SELECTA   
5   1999-12-26         6   TWO IN A MILLION/YOU'RE MY NUMBER ONE   
6   1999-12-26         7           COGNOSCENTI