In [None]:
%pip install youtube_transcript_api
%pip install --upgrade google-api-python-client
%pip install --upgrade google-auth-oauthlib google-auth-httplib2
%pip install pandas


In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from youtube_transcript_api import YouTubeTranscriptApi
import requests
import csv
import os
import pandas as pd
import json

def get_channel_upload_playlist_id_by_username(api_key, username):
    url = f"https://www.googleapis.com/youtube/v3/channels?part=contentDetails&forUsername={username}&key={api_key}"

    response = requests.get(url)
    data = response.json()
    upload_playlist_id = data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
    return upload_playlist_id

def get_channel_upload_playlist_id_by_channelid(api_key, channel_id):
    url = f"https://www.googleapis.com/youtube/v3/channels?part=contentDetails&id={channel_id}&key={api_key}"
    response = requests.get(url)
    data = response.json()
    upload_playlist_id = data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
    return upload_playlist_id

def get_playlist_items(api_key, playlist_id):
    url = f"https://www.googleapis.com/youtube/v3/playlistItems?part=snippet,contentDetails&maxResults=50&playlistId={playlist_id}&key={api_key}"
    items = []

    while True:
        response = requests.get(url)
        data = response.json()

        items.extend(data["items"])

        next_page_token = data.get("nextPageToken")
        if not next_page_token:
            break

        url = f"{url}&pageToken={next_page_token}"

    return items

def fetch_transcript(video_id):
    try:
        full_transcript = YouTubeTranscriptApi.get_transcript(video_id)
        optimized_transcript = [
            {'text': segment['text'].replace('\n', ' '), 'start': segment['start']}
            for segment in full_transcript
        ]
        return video_id, optimized_transcript
    except Exception as e:
        print(f"Error getting transcript for video {video_id}: {e}")
        return video_id, None

# Replace with your own API key and the target YouTube username
api_key = "AIzaSyCZkMX4lpsmBLzaVQRBbkVXc8jUHt8mE18"
target_username = "hubermanlab"
channel_id="UC2D2CMWXMOVWx7giW1n3LIg"

# Step 1: Get the upload playlist ID
upload_playlist_id = get_channel_upload_playlist_id_by_channelid(api_key, channel_id)
print(f"Upload Playlist ID: {upload_playlist_id}")

# Step 2: Get all videos in the upload playlist
playlist_items = get_playlist_items(api_key, upload_playlist_id)

# Specify the full path to your desktop directory
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')

with ThreadPoolExecutor(max_workers=20) as executor:
    future_to_video_id = {executor.submit(fetch_transcript, item["contentDetails"]["videoId"]): item for item in playlist_items}

for future in as_completed(future_to_video_id):
    video_item = future_to_video_id[future]
    video_id = video_item["contentDetails"]["videoId"]
    video_title = video_item["snippet"]["title"]
    video_url = f"https://www.youtube.com/watch?v={video_id}"
    transcript_file = os.path.join(desktop_path, f"{video_id}_transcript.json")

    try:
        _, optimized_transcript = future.result()
        if optimized_transcript is not None:
            video_transcript = {
                'video_id': video_id,
                'video_title': video_title,
                'video_url': video_url,
                'transcripts': optimized_transcript
            }

            # Save the optimized transcript to a JSON file
            with open(transcript_file, 'w') as outfile:
                json.dump(video_transcript, outfile)  # Removed indentation for faster writing
            
            print(f"Optimized transcript for video '{video_title}' saved to '{transcript_file}'.")
    except Exception as e:
        print(f"Error processing transcript for video {video_id}: {e}")

print(f"\nVideo transcripts have been saved to the desktop.")

In [None]:
p=[{'text': 'welcome to the huberman Lab podcast', 'start': 0.28, 'duration': 3.48}, {'text': 'where we discuss science and', 'start': 2.159, 'duration': 3.491}]

In [None]:
import json


transcript_str = json.dumps(p)
transcript_str
