In [1]:
# AIzaSyBABGpAlpNwwAZYQuG2JGkgkgas5D8kpsk

In [None]:
import os
from googleapiclient.discovery import build
import json

YOUTUBE_API_KEY = os.environ.get('YOUTUBE_API_KEY')
if not YOUTUBE_API_KEY:
    raise RuntimeError('Set YOUTUBE_API_KEY environment variable')

CHANNEL_ID = os.environ.get('YOUTUBE_CHANNEL_ID', '...')


In [None]:
# Initialize YouTube API client
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)


In [15]:

videos = []

# Retrieve the uploads playlist ID
res = youtube.channels().list(id=channel_id, part='contentDetails').execute()


In [16]:
res

{'kind': 'youtube#channelListResponse',
 'etag': '66HRC4JSww4NDztABRk3HgKsucQ',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 's9Rqu7HSBzO35n9kNjJxFkcXR-Q',
   'id': 'UCGzfpg1YiBIlgcODQI4lDvQ',
   'contentDetails': {'relatedPlaylists': {'likes': '',
     'uploads': 'UUGzfpg1YiBIlgcODQI4lDvQ'}}}]}

In [17]:

uploads_playlist = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']

# Paginate through playlist items
token = None
while True:
    pl_request = youtube.playlistItems().list(
        playlistId=uploads_playlist,
        part='snippet,contentDetails',
        maxResults=50,
        pageToken=token
    )
    pl_response = pl_request.execute()

    for item in pl_response['items']:
        video_data = {
            'title': item['snippet']['title'],
            'videoId': item['contentDetails']['videoId'],
            'url': f"https://youtu.be/{item['contentDetails']['videoId']}",
            'publishedAt': item['contentDetails']['videoPublishedAt'],
        }
        videos.append(video_data)

    token = pl_response.get('nextPageToken')
    if not token:
        break

# Save to JSON
with open('bushwacker_videos.json', 'w', encoding='utf-8') as f:
    json.dump(videos, f, ensure_ascii=False, indent=2)

print(f"Saved {len(videos)} videos to bushwacker_videos.json")


Saved 128 videos to bushwacker_videos.json


In [None]:
import re
from typing import Optional, Tuple, List

ROMAN_VALUES = {
    'I': 1, 'V': 5, 'X': 10, 'L': 50,
    'C': 100, 'D': 500, 'M': 1000,
}

def roman_to_int(s: str) -> Optional[int]:
    s = s.upper()
    if not s or any(ch not in ROMAN_VALUES for ch in s):
        return None
    total = 0
    prev = 0
    for ch in reversed(s):
        val = ROMAN_VALUES[ch]
        if val < prev:
            total -= val
        else:
            total += val
        prev = val
    # crude sanity check for century roman numerals (<= 30 is plenty)
    return total if 1 <= total <= 30 else None


def parse_years(title: str) -> Tuple[Optional[int], Optional[int]]:
    t = title.lower()
    bce = 'до н' in t  # "до н. э." indicates BCE

    # numeric range (supports en dash/em dash/hyphen)
    m = re.search(r'(-?\d{1,4})\s*[–—-]\s*(-?\d{1,4})', t)
    if m:
        a = int(m.group(1)); b = int(m.group(2))
        if bce:
            a = -abs(a); b = -abs(b)
        start, end = (a, b) if a <= b else (b, a)
        return start, end

    # roman century range (e.g., "XI - XIII")
    mr = re.search(r'\b([ivxlcdm]{1,4})\s*[–—-]\s*([ivxlcdm]{1,4})\b', t)
    if mr:
        a = roman_to_int(mr.group(1))
        b = roman_to_int(mr.group(2))
        if a and b:
            lo, hi = (a, b) if a <= b else (b, a)
            start = (lo - 1) * 100
            end = hi * 100
            if bce:
                start, end = -start, -end
                start, end = (start, end) if start <= end else (end, start)
            return start, end

    # single year
    m1 = re.search(r'(-?\d{1,4})', t)
    if m1:
        y = int(m1.group(1))
        if bce:
            y = -abs(y)
        return y, y

    return None, None


def infer_tags(title: str) -> List[str]:
    t = title.lower()
    tags: List[str] = ['lecture']
    if any(k in t for k in ['кндр','коре','япон','китай','монгол','тайван','гонконг','хань','цинь']):
        tags.append('East Asia')
    elif any(k in t for k in ['израил','араб','ислам','сири','вавилон','ассир','месопотам','хетт','иуде']):
        tags.append('Middle East')
    elif any(k in t for k in ['франц','итал','венеци','греци','рим','герман','европ','англ','рус','скандинав','средневековая франц']):
        tags.append('Europe')
    elif any(k in t for k in ['сша','америк','североамерикан']):
        tags.append('North America')
    elif any(k in t for k in ['чили','инк','ацтек']):
        tags.append('South America')
    elif any(k in t for k in ['африк','егип','южной африк','чёрн']):
        tags.append('Africa')
    elif any(k in t for k in ['полинез','австрал','оцеани','новая зеланд']):
        tags.append('Oceania')
    else:
        tags.append('Global')
    return tags

# Build lecture items from previously scraped `videos`
lectures = []
for v in videos:
    start, end = parse_years(v['title'])
    lectures.append({
        'title': v['title'],
        'videoId': v['videoId'],
        'url': v['url'],
        'tags': infer_tags(v['title']),
        'start': start if start is not None else None,
        'end': end if end is not None else None,
        'publishedAt': v.get('publishedAt')
    })

with open('lectures.json', 'w', encoding='utf-8') as f:
    json.dump(lectures, f, ensure_ascii=False, indent=2)

print(f"Saved {len(lectures)} items to lectures.json")

