# ARP - Data Collection: YouTube Trancripts for 600e

In [2]:
!pip install google-api-python-client youtube-transcript-api

Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.2


In [13]:
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
import pandas as pd

# Set up your API key and build the service
API_KEY = 'AIzaSyAzpjQqQdORs2cqfuIjbFe7JDyLzx9PMko'
youtube = build('youtube', 'v3', developerKey=API_KEY)

# Function to get all video IDs in a playlist
def get_video_ids(playlist_id):
    video_ids = []
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId=playlist_id,
        maxResults=50
    )
    while request:
        response = request.execute()
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])
        request = youtube.playlistItems().list_next(request, response)
    return video_ids

# Function to get video details (title and channel name)
def get_video_details(video_id):
    request = youtube.videos().list(
        part='snippet',
        id=video_id
    )
    response = request.execute()
    if response['items']:
        snippet = response['items'][0]['snippet']
        title = snippet['title']
        channel_title = snippet['channelTitle']
        return title, channel_title
    return None, None

# Function to get transcript for a video
def get_transcript(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript = " ".join([entry['text'] for entry in transcript_list])
        return transcript
    except Exception as e:
        return str(e)

# Function to get transcripts and details for all videos in a playlist
def get_playlist_data(playlist_id):
    video_ids = get_video_ids(playlist_id)
    all_data = []
    for video_id in video_ids:
        title, channel_name = get_video_details(video_id)
        transcript = get_transcript(video_id)
        all_data.append({
            'Video ID': video_id,
            'Title': title,
            'Channel Name': channel_name,
            'Transcript': transcript
        })
    return all_data

# Example usage
playlist_id = 'PLTWBIoTLe6fK2OGkBzs_Mg0MCTtnKWTXh'
data = get_playlist_data(playlist_id)
df = pd.DataFrame(data)
df.to_csv('youtube_playlist_data_with_transcripts.csv', index=False)


In [14]:
df

Unnamed: 0,Video ID,Title,Channel Name,Transcript
0,AXGNb76QgH0,NEW Fiat 600E Review: Fantastic or Flawed? | 4K,AutoTrader,foreign 500e and you should it's brilliant but...
1,MvgfB8kcS1s,I wasn't expecting to like THIS car so much?! ...,Auto Social UK,fats have always had this fabulous way of winn...
2,eZMrw98nHgU,Fiat 600e FIRST UK DRIVE. Does the 500e’s big ...,Electrifying,I know what you're thinking Nikki's lost her m...
3,HAZC8wrWQGM,All-new 600e Review: Fiat's family friendly EV,CarGurus UK,the Fiat 600e is the latest electric car from ...
4,Z6vFfBmTXuA,Is this car a bit *too* quirky? Fiat 600e Review,Electroheads,hello Electro heads Tish here and welcome back...
5,VokQJrHzBR8,FIRST DRIVE: New Fiat 600e - does the 500e’s b...,Electrifying,some interesting things about the Fiat 600e I ...
6,ejT6GwY6cSY,"Is This The Small, SIMPLE Electric Car We've B...",Fully Charged Show,hello this is the new Fiat 600 electric and it...
7,bxgXvFlt9aY,Fiat 600e review – Bigger but better than the ...,batchreviews,you don't need me to tell you the Italians are...
8,HLquNEYSKWk,A FUN Family Car?! New Fiat 600 Review - & Why...,cinch,[Music] the fat 500 has long been a hit with t...
9,1OWaEGStiA0,New Fiat 600e (2024) | FIRST LOOK,Planet Car News,[Music] thank you [Music] foreign


In [15]:
csv_file = 'transcript_data_600e.csv'  # Name your file
df.to_csv(csv_file, index=False)

from google.colab import files

# Trigger a download to your local machine
files.download(csv_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>