### Crawling and streaming for **live** by **searching keywords**

In [2]:
import os
import cv2
import time
import json
import subprocess
from datetime import datetime

from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials



#__________________________________________Configuration Section - please edit below as needed____________________________________________________________#
API_KEY_PATH = "API_key.txt"                                          # please edit by your own API key file
DRIVE_CREDENTIALS = "credentials.json"                                # please edit by your own OAuth client secret file 
SCOPES = ['https://www.googleapis.com/auth/drive.file']                                             
DRIVE_FOLDER_ID = "1n-kO2CM8e9ZfHQ2Pp352j87n-ZziZVgm"                 # please edit by your own folder ID (shown in the URL of your google drive folder)
SAVE_INTERVAL = 30                                                    # 30 seconds to save one jpg, avoid too frequent uploads
SEARCH_QUERY = "aurora live cam"                                      # keyword to search live video on YouTube
#_________________________________________________________________________________________________________________________________________________________#



# below code no need to edit, unless you want to change functionality
def load_api_key():
    with open(API_KEY_PATH, 'r') as f:
        return f.read().strip()

def search_live_video(api_key, query):
    youtube = build('youtube', 'v3', developerKey=api_key)
    req = youtube.search().list(
        q=query,
        part='snippet',
        eventType='live',
        type='video',
        maxResults=1
    )
    res = req.execute()
    if res['items']:
        vid = res['items'][0]['id']['videoId']
        return f"https://www.youtube.com/watch?v={vid}"
    return None

def get_stream_url(video_url):
    result = subprocess.run(["yt-dlp", "-g", video_url], capture_output=True, text=True)
    return result.stdout.strip()

def get_drive_service():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file(DRIVE_CREDENTIALS, SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('drive', 'v3', credentials=creds)

def upload_to_drive(local_path, filename):
    service = get_drive_service()
    file_metadata = {
        'name': filename,
        'parents': [DRIVE_FOLDER_ID]
    }
    media = MediaFileUpload(local_path, mimetype='image/jpeg')
    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    print("Uploaded:", filename)

def stream_and_upload(stream_url):
    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print("Failed to open stream.")
        return

    print("Streaming started. Press Ctrl+C to stop.")
    last_saved = time.time()

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Stream ended.")
                break

            cv2.imshow("Live Stream", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("User exited.")
                break

            now = time.time()
            if now - last_saved >= SAVE_INTERVAL:
                ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
                filename = f"frame_{ts}.jpg"
                cv2.imwrite(filename, frame)
                upload_to_drive(filename, filename)
                os.remove(filename)
                last_saved = now
    except KeyboardInterrupt:
        print("Interrupted.")
    finally:
        cap.release()
        cv2.destroyAllWindows()

# Main Code below (no need to edit)
if __name__ == "__main__":
    api_key = load_api_key()
    video_url = search_live_video(api_key, SEARCH_QUERY)
    if video_url:
        print("Found live video:", video_url)
        stream_url = get_stream_url(video_url)
        print("Stream URL:", stream_url)
        stream_and_upload(stream_url)
    else:
        print("No live stream found.")

Found live video: https://www.youtube.com/watch?v=Xe2g_54uj_U
Stream URL: https://manifest.googlevideo.com/api/manifest/hls_playlist/expire/1763453719/ei/t9YbaaHhFM6t0-kP9-v82Aw/ip/144.214.0.6/id/Xe2g_54uj_U.1/itag/96/source/yt_live_broadcast/requiressl/yes/ratebypass/yes/live/1/sgoap/gir%3Dyes%3Bitag%3D140/sgovp/gir%3Dyes%3Bitag%3D137/rqh/1/hls_chunk_host/rr2---sn-i5goxu-i3bl.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/playlist_duration/30/manifest_duration/30/bui/AdEuB5T4r59wEjKCIlDlHaL_N5ECd_FVooHPiLAf9jmC9CIKOBhwyehsiC6R7HqzgJXYrXHDYVl6PtI0/spc/6b0G_AXqolRbssKMol3N/vprv/1/playlist_type/DVR/cps/0/initcwndbps/5495000/met/1763432119,/mh/w0/mm/44/mn/sn-i5goxu-i3bl/ms/lva/mv/m/mvi/2/pl/19/rms/lva,lva/dover/11/pacing/0/keepalive/yes/fexp/51514994,51552689,51565116,51565682,51580968/mt/1763431724/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,live,sgoap,sgovp,rqh,xpc,playlist_duration,manifest_duration,bui,spc,vprv,playlist_type/sig/AJfQdSswRQIhAPN9U6T-UbeOeqqCQezrAHPR5rrerL6aO9fgOq5P

  ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')


Uploaded: frame_20251118_021552.jpg
Uploaded: frame_20251118_021622.jpg
Uploaded: frame_20251118_021654.jpg
Uploaded: frame_20251118_021724.jpg
Uploaded: frame_20251118_021754.jpg
Uploaded: frame_20251118_021824.jpg
Uploaded: frame_20251118_021854.jpg
Interrupted.


### The below is for crawling and streaming for **video** by **accessing channel**

In [4]:
# The below command gets the uploader ID of the channel
!yt-dlp -J --flat-playlist https://www.youtube.com/@TheArcticSkies/videos

{"id": "UCRQkojTvYhnBt6D3ELwfINQ", "channel": "The Arctic Skies", "channel_id": "UCRQkojTvYhnBt6D3ELwfINQ", "title": "The Arctic Skies - Videos", "availability": null, "channel_follower_count": 60, "description": "Welcome! \n\nHere you can watch the sky 24/7. Camera is situated north of the arctic circle in the Swedish Lapland, Here you can see the bright skies during the midnight sun and the Auroras during winter.\n\nAs time goes by and I gather footage you may start seeing some cool monthly and yearly timelapses\n", "tags": ["nature", "aurora", "borealis", "aurora borealis", "midnight", "sun", "midnight sun", "Sweden", "Portugal", "arctic", "circle", "arctic circle"], "thumbnails": [{"url": "https://yt3.googleusercontent.com/Gp_3ijT1tytXD5DrWKmKPGdADZGc5Vf-qTgQBURRVtmHxJ5yZZjEB_Mvo9d9cjQ-sx4smh-knw=w1060-fcrop64=1,00005a57ffffa5a8-k-c0xffffffff-no-nd-rj", "height": 175, "width": 1060, "preference": -10, "id": "0", "resolution": "1060x175"}, {"url": "https://yt3.googleusercontent.com/

In [1]:
import os
import cv2
import time
import subprocess
from datetime import datetime
import threading

from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials

#__________________________________________Configuration Section - please edit below as needed__________________________________________________________________________________#
API_KEY_PATH = "API_key.txt"                                          # please edit by your own API key file
DRIVE_CREDENTIALS = "credentials.json"                                # please edit by your own OAuth client secret file
SCOPES = ['https://www.googleapis.com/auth/drive.file']
DRIVE_FOLDER_ID = "1n-kO2CM8e9ZfHQ2Pp352j87n-ZziZVgm"                 # please edit by your own folder ID (shown in the URL of your google drive folder)
SAVE_INTERVAL = 30                                                    # 30 seconds to save one jpg, avoid too frequent uploads
CHANNEL_ID = "UCRQkojTvYhnBt6D3ELwfINQ"                               # The Arctic Skies channel ID, you can change to any channel you want to crawl
MAX_VIDEOS = 3                                                        # Max number of videos to stream concurrently, you can change this number if your hardware can handle :)
#_______________________________________________________________________________________________________________________________________________________________________________#


# below code no need to be edited, unless you want to change functionality
def get_drive_service():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file(DRIVE_CREDENTIALS, SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('drive', 'v3', credentials=creds)


def upload_to_drive(local_path, filename):
    service = get_drive_service()
    file_metadata = {
        'name': filename,
        'parents': [DRIVE_FOLDER_ID]
    }
    media = MediaFileUpload(local_path, mimetype='image/jpeg')
    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    print("Uploaded:", filename)

def load_api_key():
    with open(API_KEY_PATH, 'r') as f:
        return f.read().strip()


def get_recent_channel_videos(api_key, channel_id, max_results=MAX_VIDEOS):
    youtube = build('youtube', 'v3', developerKey=api_key)
    req = youtube.search().list(
        channelId=channel_id,
        type='video',
        order='date',
        part='id',
        maxResults=max_results
    )
    res = req.execute()
    video_urls = []
    for item in res.get("items", []):
        vid = item['id']['videoId']
        video_urls.append(f"https://www.youtube.com/watch?v={vid}")
    return video_urls

def get_stream_url(video_url):
    result = subprocess.run(["yt-dlp", "-g", video_url], capture_output=True, text=True)
    return result.stdout.strip()

def stream_and_upload(stream_url, video_id):
    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print(f"Failed to open stream: {stream_url}")
        return

    print(f"Started streaming video: {video_id}")
    last_saved = time.time()

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print(f"Video ended: {video_id}")
                break

            now = time.time()
            if now - last_saved >= SAVE_INTERVAL:
                ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
                filename = f"{video_id}_{ts}.jpg"
                cv2.imwrite(filename, frame)
                upload_to_drive(filename, filename)
                os.remove(filename)
                last_saved = now

            if cv2.waitKey(1) & 0xFF == ord('q'):
                print(f"User stopped: {video_id}")
                break
    except KeyboardInterrupt:
        print(f"Interrupted: {video_id}")
    finally:
        cap.release()

def start_stream_thread(video_url):
    video_id = video_url.split("v=")[-1]
    stream_url = get_stream_url(video_url)
    if stream_url:
        stream_and_upload(stream_url, video_id)
    else:
        print(f"Could not get stream URL for {video_url}")


# main code below (no need to edit)
if __name__ == "__main__":
    api_key = load_api_key()
    video_urls = get_recent_channel_videos(api_key, CHANNEL_ID)

    if not video_urls:
        print("No videos found on the channel.")
    else:
        print(f"Found {len(video_urls)} video(s). Starting streams...")
        threads = []
        for url in video_urls:
            t = threading.Thread(target=start_stream_thread, args=(url,))
            t.start()
            threads.append(t)

        for t in threads:
            t.join()

Found 3 video(s). Starting streams...
Started streaming video: lvH-meurOBc
Started streaming video: 3d_P8H3tU_E
Started streaming video: eDPi6BiUeV8


  ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')


Uploaded: lvH-meurOBc_20251118_054025.jpg
Uploaded: eDPi6BiUeV8_20251118_054028.jpg
Uploaded: 3d_P8H3tU_E_20251118_054033.jpg
Uploaded: lvH-meurOBc_20251118_054056.jpg
Uploaded: eDPi6BiUeV8_20251118_054058.jpg
Uploaded: 3d_P8H3tU_E_20251118_054103.jpg
Uploaded: lvH-meurOBc_20251118_054126.jpg
Uploaded: eDPi6BiUeV8_20251118_054128.jpg
Uploaded: 3d_P8H3tU_E_20251118_054133.jpg
Uploaded: lvH-meurOBc_20251118_054156.jpg


Exception in thread Thread-3 (start_stream_thread):
Traceback (most recent call last):
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\httplib2\__init__.py", line 1369, in _conn_request
Exception in thread Thread-4 (start_stream_thread):
Traceback (most recent call last):
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\httplib2\__init__.py", line 1369, in _conn_request
Exception in thread Thread-5 (start_stream_thread):
Traceback (most recent call last):
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\httplib2\__init__.py", line 1369, in _conn_request
    conn.connect()
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\httplib2\__init__.py", line 1145, in connect
    conn.connect()
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\httplib2\__init__.py", line 1145, in connect
    address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
                   ^^^^^^^^^^^^^^^^

### The below is for crawling and streaming for **video** by **accessing the channel**

In [2]:
import os
import cv2
import time
import subprocess
from datetime import datetime
import threading

from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials

#__________________________________________Configuration Section - please edit below as needed__________________________________________________________________________________#
API_KEY_PATH = "API_key.txt"                                          # please edit by your own API key file
DRIVE_CREDENTIALS = "credentials.json"                                # please edit by your own OAuth client secret file
SCOPES = ['https://www.googleapis.com/auth/drive.file']
DRIVE_FOLDER_ID = "1n-kO2CM8e9ZfHQ2Pp352j87n-ZziZVgm"                 # please edit by your own folder ID (shown in the URL of your google drive folder)
SAVE_INTERVAL = 30                                                    # 30 seconds to save one jpg, avoid too frequent uploads
CHANNEL_ID = "UCRQkojTvYhnBt6D3ELwfINQ"                               # The Arctic Skies channel ID, you can change to any channel you want to crawl
MAX_STREAMS = 3                                                       # Max number of lives to stream concurrently, you can change this number if your hardware can handle :)
#_______________________________________________________________________________________________________________________________________________________________________________#

# below code no need to be edited, unless you want to change functionality
def get_drive_service():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file(DRIVE_CREDENTIALS, SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('drive', 'v3', credentials=creds)

def upload_to_drive(local_path, filename):
    service = get_drive_service()
    file_metadata = {
        'name': filename,
        'parents': [DRIVE_FOLDER_ID]
    }
    media = MediaFileUpload(local_path, mimetype='image/jpeg')
    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    print("Uploaded:", filename)

def load_api_key():
    with open(API_KEY_PATH, 'r') as f:
        return f.read().strip()

def search_live_videos_from_channel(api_key, channel_id, max_results=MAX_STREAMS):
    youtube = build('youtube', 'v3', developerKey=api_key)
    req = youtube.search().list(
        channelId=channel_id,
        eventType='live',
        type='video',
        part='id',
        maxResults=max_results
    )
    res = req.execute()
    video_urls = []
    for item in res.get("items", []):
        vid = item['id']['videoId']
        video_urls.append(f"https://www.youtube.com/watch?v={vid}")
    return video_urls

def get_stream_url(video_url):
    result = subprocess.run(["yt-dlp", "-g", video_url], capture_output=True, text=True)
    return result.stdout.strip()

def stream_and_upload(stream_url, video_id):
    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print(f"Failed to open stream: {stream_url}")
        return

    print(f"Started streaming: {video_id}")
    last_saved = time.time()

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print(f"Stream ended for {video_id}")
                break

            now = time.time()
            if now - last_saved >= SAVE_INTERVAL:
                ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
                filename = f"{video_id}_{ts}.jpg"
                cv2.imwrite(filename, frame)
                upload_to_drive(filename, filename)
                os.remove(filename)
                last_saved = now

            if cv2.waitKey(1) & 0xFF == ord('q'):
                print(f" Stream manually stopped: {video_id}")
                break
    except KeyboardInterrupt:
        print(f" Interrupted: {video_id}")
    finally:
        cap.release()

def start_stream_thread(video_url):
    video_id = video_url.split("v=")[-1]
    stream_url = get_stream_url(video_url)
    if stream_url:
        stream_and_upload(stream_url, video_id)
    else:
        print(f" Could not get stream URL for {video_url}")

# main code below (no need to edit)
if __name__ == "__main__":
    api_key = load_api_key()
    video_urls = search_live_videos_from_channel(api_key, CHANNEL_ID)

    if not video_urls:
        print("No live streams found on the channel.")
    else:
        print(f"Found {len(video_urls)} live stream(s). Starting...")
        threads = []
        for url in video_urls:
            t = threading.Thread(target=start_stream_thread, args=(url,))
            t.start()
            threads.append(t)

        for t in threads:
            t.join()

Found 1 live stream(s). Starting...
Started streaming: 8o9eaJCQ28w


  ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')


Uploaded: 8o9eaJCQ28w_20251118_094735.jpg
Uploaded: 8o9eaJCQ28w_20251118_094807.jpg
Uploaded: 8o9eaJCQ28w_20251118_094837.jpg
Uploaded: 8o9eaJCQ28w_20251118_094907.jpg
Uploaded: 8o9eaJCQ28w_20251118_094937.jpg
Uploaded: 8o9eaJCQ28w_20251118_095008.jpg
Uploaded: 8o9eaJCQ28w_20251118_095039.jpg
Uploaded: 8o9eaJCQ28w_20251118_095109.jpg
Uploaded: 8o9eaJCQ28w_20251118_095139.jpg
Uploaded: 8o9eaJCQ28w_20251118_095209.jpg
Uploaded: 8o9eaJCQ28w_20251118_095239.jpg


Exception in thread Thread-12 (start_stream_thread):
Traceback (most recent call last):
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\alanchuang\AppData\Local\Temp\ipykernel_23324\2628213312.py", line 106, in start_stream_thread
  File "C:\Users\alanchuang\AppData\Local\Temp\ipykernel_23324\2628213312.py", line 90, in stream_and_upload
  File "C:\Users\alanchuang\AppData\Local\Temp\ipykernel_23324\2628213312.py", line 42, in upload_to_drive
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
    return wrapped(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\alanchuang\anaconda3\envs\myenv3.0\Lib\site-packages\googleapiclient\http.py", line 923, in execute
    resp, c