In [6]:
import datetime
import os
import platform
from datetime import date, timedelta

import requests
from dateutil import rrule

output_folder = "G:/Reading/年年月月/The Economist/mp3"

In [17]:
class TheEconomistAudioScraper:
    def __init__(self):

        self.download_archive_url = (
            "http://audiocdn.economist.com/sites/default/files/AudioArchive"
        )
        self.double_issue_skip = [
            # [2012, 7, 28],
            [2022, 8, 6],
            [2023, 8, 5],
        ]

    def allsaturdays(self, year):
        ###
        # This function get all Saturdays in a specified year
        ###

        d = date(year, 1, 1)  # January 1st
        t = timedelta((7 + 5 - d.weekday()) % 7)  # First Saturday
        d = d + t
        while d.year == year:
            yield d
            d += timedelta(days=7)

    def get_saturdays(self, date_start: str, date_end: str = None):
        # https://stackoverflow.com/questions/61948459/how-to-get-a-list-of-some-specific-weekdays-within-a-date-range-in-python
        # the input format is "%Y-%m-%d"
        date_start = datetime.datetime.strptime(date_start, "%Y-%m-%d")
        if date_end is None:
            date_end = datetime.datetime.today()
        else:
            date_end = datetime.datetime.strptime(date_end, "%Y-%m-%d")

        result = []
        while date_start <= date_end:
            if date_start.weekday() == 5:  # 0 == Monday
                result.append(date_start)
            date_start += datetime.timedelta(days=1)

        return result

    def getIssueNo(self, issue_date):

        origin_date = datetime.datetime(2021, 1, 2)
        origin_issue_no = 9226
        double_issue_skip = list(
            map(lambda x: datetime.datetime(x[0], x[1], x[2]), self.double_issue_skip)
        )

        if issue_date >= origin_date:
            start_date = origin_date
            end_date = issue_date
            sign = 1
        else:
            start_date = issue_date
            end_date = origin_date
            sign = -1
        # this will return a generator of date
        # starting from start_date
        weeks = rrule.rrule(rrule.WEEKLY, dtstart=start_date, until=end_date)
        delta = 0
        for week in weeks:
            # skip Chistmas Holidays
            if week.month == 12 and week.day >= 25:
                continue
            elif week in double_issue_skip:
                continue
            else:
                delta += 1
        delta = sign * (delta - 1)

        issue_no = origin_issue_no + delta

        return issue_no

    def downloadFile(self, file_url, output_fname, output_path):
        local_fpath = os.path.join(output_path, output_fname)
        curr_system = platform.system()
        if curr_system == "Windows":
            with requests.get(file_url, stream=True) as r:
                r.raise_for_status()
                with open(local_fpath, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            return f"{local_fpath} Done!"
        else:
            import wget

            wget.download(file_url, out=local_fpath)
            return f"{local_fpath} Done!"

    def getEcoAudio(self, year, month, day, output_path="./", show_url_only=True):

        if not (month == 12 and day >= 25) and (
            not [year, month, day] in self.double_issue_skip
        ):
            issue_date = datetime.datetime(year, month, day)
            issue_no = self.getIssueNo(issue_date)
            issue_date_formatted = issue_date.strftime("%Y%m%d")

            file_name = f"Issue_{issue_no}_{issue_date_formatted}_The_Economist_Full_edition.zip"
            download_url = (
                f"{self.download_archive_url}/{year}/{issue_date_formatted}/{file_name}"
            )

            if show_url_only:
                return download_url

            self.downloadFile(
                download_url,
                output_fname=f"TheEconomist_AudioEdition_{issue_date_formatted}.zip",
                output_path=output_path,
            )
        else:
            return

In [20]:
TEA_Scarper = TheEconomistAudioScraper()
for target_year in range(2012, 2013):
    print("##", target_year, "\n")
    start_week = 1
    for issue_date in TEA_Scarper.allsaturdays(target_year):
        try:
            download_url = TEA_Scarper.getEcoAudio(
                issue_date.year,
                issue_date.month,
                issue_date.day,
                output_path=output_folder,
                show_url_only=True,
            )
            if download_url:
                print(f"{start_week}.", download_url, "\n")
                start_week += 1
        except Exception as e:
            print(issue_date)
            print(e)
            continue

## 2012 

1. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120107/Issue_8767_20120107_The_Economist_Full_edition.zip 

2. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120114/Issue_8768_20120114_The_Economist_Full_edition.zip 

3. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120121/Issue_8769_20120121_The_Economist_Full_edition.zip 

4. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120128/Issue_8770_20120128_The_Economist_Full_edition.zip 

5. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120204/Issue_8771_20120204_The_Economist_Full_edition.zip 

6. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120211/Issue_8772_20120211_The_Economist_Full_edition.zip 

7. http://audiocdn.economist.com/sites/default/files/AudioArchive/2012/20120218/Issue_8773_20120218_The_Economist_Full_edition.zip 

8. http://audiocdn.economist.com/sites/default/files/AudioA

In [15]:
for issue_date in TEA_Scarper.get_saturdays("2023-08-12"):
    try:
        TEA_Scarper.getEcoAudio(
            issue_date.year,
            issue_date.month,
            issue_date.day,
            output_path=output_folder,
            show_url_only=True,
        )
    except Exception as e:
        print(issue_date)
        print(e)
        continue

http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230812/Issue_9358_20230812_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230819/Issue_9359_20230819_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230826/Issue_9360_20230826_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230902/Issue_9361_20230902_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230909/Issue_9362_20230909_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230916/Issue_9363_20230916_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230923/Issue_9364_20230923_The_Economist_Full_edition.zip
http://audiocdn.economist.com/sites/default/files/AudioArchive/2023/20230930/Issue_9365_20230930_The_Eco