In [1]:
from googleapiclient.discovery import build
import json
import pandas as pd
from pytube import extract
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
from googletrans import Translator

In [2]:
with open("youtube_api_key.txt","r") as file:
    api_key = file.read().strip()

In [3]:
channel_id = "UC-Lq6oBPTgTXT_K-ylWL6hg"

In [4]:
# Build the YouTube API client
youtube = build("youtube", "v3", developerKey=api_key)

In [5]:
def get_channel_videos(channel_id):
    # Retrieve the uploads playlist ID
    request = youtube.channels().list(
        part="contentDetails",
        id=channel_id
    )
    response = request.execute()
    uploads_playlist_id = response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    # Retrieve all videos in the uploads playlist
    videos = []
    next_page_token = None
    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=uploads_playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()
        videos += response["items"]
        next_page_token = response.get("nextPageToken")
        if next_page_token is None:
            break

    return videos

In [6]:
def get_video_descriptions(videos):
    descriptions = []
    for video in videos:
        video_id = video["snippet"]["resourceId"]["videoId"]
        title = video["snippet"]["title"]
        description = video["snippet"]["description"]
        descriptions.append({"title": title, "description": description, "video_id": video_id})
    return descriptions

In [7]:
# Get all videos from the channel
videos = get_channel_videos(channel_id)

# Get descriptions of all videos
descriptions = get_video_descriptions(videos)

In [8]:
videos

[{'kind': 'youtube#playlistItem',
  'etag': 'jj3Unimg3qK0ihejyFKWUbcZa7w',
  'id': 'VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLkhHOUptZXdWRTA0',
  'snippet': {'publishedAt': '2024-06-03T06:30:07Z',
   'channelId': 'UC-Lq6oBPTgTXT_K-ylWL6hg',
   'title': 'Lunch At A Highway Eatery! Namma Hallimane, Byrapura #hassan #foodvlog #mangalore #highway',
   'description': 'Only about three years old, however Namma HalliMane is fast emerging as ‘the stop’ for its wood fire-cooked biryani and mutton delicacies on this stretch of Mangalore highway. Watch the complete video at https://youtu.be/7DeoFR_RHRU \n\nSubscribe for more like this!\n\nADDRESS - Namma Hallimane, NH 75, Near Petrol Bunk, Bangalore - Mangalore Highway, Byrapura, Alur Taluk, Hassan District, Karnataka 573218; tel: +91 95382 28237\nLOCATION TAG - https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6    \nTIMINGS: 12pm onwards, open for lunch only.\n\n#gourmetontheroad #kripalamanna #foodloverstv #nammahallimane #byrapura #nonveg #biryani #mutton #karn

In [9]:
descriptions

[{'title': 'Lunch At A Highway Eatery! Namma Hallimane, Byrapura #hassan #foodvlog #mangalore #highway',
  'description': 'Only about three years old, however Namma HalliMane is fast emerging as ‘the stop’ for its wood fire-cooked biryani and mutton delicacies on this stretch of Mangalore highway. Watch the complete video at https://youtu.be/7DeoFR_RHRU \n\nSubscribe for more like this!\n\nADDRESS - Namma Hallimane, NH 75, Near Petrol Bunk, Bangalore - Mangalore Highway, Byrapura, Alur Taluk, Hassan District, Karnataka 573218; tel: +91 95382 28237\nLOCATION TAG - https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6    \nTIMINGS: 12pm onwards, open for lunch only.\n\n#gourmetontheroad #kripalamanna #foodloverstv #nammahallimane #byrapura #nonveg #biryani #mutton #karnatakafoodtour #supportsmallbusiness #supportlocal #foodloversindia #truthinfood',
  'video_id': 'HG9JmewVE04'},
 {'title': 'Tasting Bengaluru’s Oldest, Most Popular Biryanis! TAJ Shaadi Biryani, SHIVAJI Donne Biryani!',
  'description

In [10]:
def get_video_details(videos):
    video_details = []
    for video in videos:
        video_id = video["snippet"]["resourceId"]["videoId"]
        title = video["snippet"]["title"]
        description = video["snippet"]["description"]
        link = f"https://www.youtube.com/watch?v={video_id}"
        video_details.append({"Title": title, "Link": link, "Description": description})
    return video_details

In [11]:
video_details = get_video_details(videos)

In [12]:
df = pd.DataFrame(video_details)
df.head()

Unnamed: 0,Title,Link,Description
0,"Lunch At A Highway Eatery! Namma Hallimane, By...",https://www.youtube.com/watch?v=HG9JmewVE04,"Only about three years old, however Namma Hall..."
1,"Tasting Bengaluru’s Oldest, Most Popular Birya...",https://www.youtube.com/watch?v=m2G-fnkoaOY,#tajhotelbiryani #shivajimilitaryhotelbiryani ...
2,Tasted This Unbelievably Tasty Root Soup! Muda...,https://www.youtube.com/watch?v=ERWIan4ZYr4,Watch the complete video at https://youtu.be/5...
3,What We Ate At Erodu Amman Mess! Full Menu (al...,https://www.youtube.com/watch?v=EWWe60lHEKw,Watch full video at Watch at https://youtu.be/...
4,Found This Delicious Dosa Chicken Curry For 50...,https://www.youtube.com/watch?v=APg_flJyXHs,Watch the complete episode at Watch at https:/...


In [13]:
df["Description"][:10]

0    Only about three years old, however Namma Hall...
1    #tajhotelbiryani #shivajimilitaryhotelbiryani ...
2    Watch the complete video at https://youtu.be/5...
3    Watch full video at Watch at https://youtu.be/...
4    Watch the complete episode at Watch at https:/...
5    The earliest customers for the Hoskote biryani...
6    A delightful Tamil Nadu meal which in addition...
7    #manidumbiryani #hoskote #biryani \n\nhoskote ...
8    #markwiens #kripalamanna #bangalore \n\nmark w...
9    #sangeethavegrestaurant #lunch #chennai\n \nsa...
Name: Description, dtype: object

In [14]:
print(df["Description"][468])

#friedchicken #biryani #bangalore

gowda fried chicken, fried chicken, gowda fried chicken, gfc chicken, chicken kebab, mandya recipe, mutton biryani, biryani, chicken 65, mysore road, bengaluru, bangalore

Fed up with labour problems in his vegetarian eatery of 35 years, Krishnappa Gowda closed doors 10 years ago to open a shop selling fried chicken, cooked by him to a secret recipe influenced by his Mandya roots. Such is the tantalising taste of this chicken kebab, that its fans drive across the city to stand and eat at this eatery located in a nook of Mysore road! An astute entrepreneur, and mindful of the popularity of his spice mix, Gowda has also developed packaged versions of the masala for those who would like to satisfy their fried chicken cravings at home. I make the trek to GFC and return convinced that this is perhaps the best local fried chicken I have tasted to date. Hope you enjoy this episode, take care and stay safe.

Gowda’s Fried Chicken
Tender chicken, marinated for

In [15]:
pd.DataFrame(videos[:5])

Unnamed: 0,kind,etag,id,snippet
0,youtube#playlistItem,jj3Unimg3qK0ihejyFKWUbcZa7w,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLkhHOUptZXdWRTA0,"{'publishedAt': '2024-06-03T06:30:07Z', 'chann..."
1,youtube#playlistItem,DgEhsbZcSS0ytApaO3oa8C1VtjU,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLm0yRy1mbmtvYU9Z,"{'publishedAt': '2024-06-01T05:30:12Z', 'chann..."
2,youtube#playlistItem,jMZtUSlvy1xHaynsKbBAsCsEPS8,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLkVSV0lhbjRaWXI0,"{'publishedAt': '2024-05-31T12:00:15Z', 'chann..."
3,youtube#playlistItem,q55-pEdlun7prublUeoZdzWWGRg,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLkVXV2U2MGxIRUt3,"{'publishedAt': '2024-05-30T06:30:07Z', 'chann..."
4,youtube#playlistItem,4aoSBlU6sqfcJLEygWTDxLlLdUs,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLkFQZ19mbEp5WEhz,"{'publishedAt': '2024-05-29T06:30:05Z', 'chann..."


In [16]:
import re
def extract_links(text):
    # Regular expression to find URLs
    url_pattern = re.compile(r'(https?://\S+)')
    return url_pattern.findall(text)

In [17]:
df["Links"] = df["Description"].apply(extract_links)

In [18]:
df.head()

Unnamed: 0,Title,Link,Description,Links
0,"Lunch At A Highway Eatery! Namma Hallimane, By...",https://www.youtube.com/watch?v=HG9JmewVE04,"Only about three years old, however Namma Hall...","[https://youtu.be/7DeoFR_RHRU, https://maps.ap..."
1,"Tasting Bengaluru’s Oldest, Most Popular Birya...",https://www.youtube.com/watch?v=m2G-fnkoaOY,#tajhotelbiryani #shivajimilitaryhotelbiryani ...,[https://www.youtube.com/playlist?list=PLri_37...
2,Tasted This Unbelievably Tasty Root Soup! Muda...,https://www.youtube.com/watch?v=ERWIan4ZYr4,Watch the complete video at https://youtu.be/5...,[https://youtu.be/5s-CtOb09FY]
3,What We Ate At Erodu Amman Mess! Full Menu (al...,https://www.youtube.com/watch?v=EWWe60lHEKw,Watch full video at Watch at https://youtu.be/...,[https://youtu.be/Swvj1J9sJCk]
4,Found This Delicious Dosa Chicken Curry For 50...,https://www.youtube.com/watch?v=APg_flJyXHs,Watch the complete episode at Watch at https:/...,"[https://youtu.be/fUR70aUEMEw, https://maps.ap..."


In [19]:
def filter_gmaps_links(links):
    gmaps_pattern = re.compile(r'(https?://(?:goo\.gl/maps|maps\.google|google\.com/maps|maps\.app\.goo\.gl|g\.page)\S+)')
    return [link for link in links if gmaps_pattern.match(link)]

In [20]:
df['gmaps_links'] = df['Links'].apply(filter_gmaps_links)
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links
0,"Lunch At A Highway Eatery! Namma Hallimane, By...",https://www.youtube.com/watch?v=HG9JmewVE04,"Only about three years old, however Namma Hall...","[https://youtu.be/7DeoFR_RHRU, https://maps.ap...",[https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6]
1,"Tasting Bengaluru’s Oldest, Most Popular Birya...",https://www.youtube.com/watch?v=m2G-fnkoaOY,#tajhotelbiryani #shivajimilitaryhotelbiryani ...,[https://www.youtube.com/playlist?list=PLri_37...,"[https://maps.app.goo.gl/aH7rvnS5itWGF2dy6, ht..."
2,Tasted This Unbelievably Tasty Root Soup! Muda...,https://www.youtube.com/watch?v=ERWIan4ZYr4,Watch the complete video at https://youtu.be/5...,[https://youtu.be/5s-CtOb09FY],[]
3,What We Ate At Erodu Amman Mess! Full Menu (al...,https://www.youtube.com/watch?v=EWWe60lHEKw,Watch full video at Watch at https://youtu.be/...,[https://youtu.be/Swvj1J9sJCk],[]
4,Found This Delicious Dosa Chicken Curry For 50...,https://www.youtube.com/watch?v=APg_flJyXHs,Watch the complete episode at Watch at https:/...,"[https://youtu.be/fUR70aUEMEw, https://maps.ap...",[https://maps.app.goo.gl/Pwu2EkJbnj59r3wV8]


In [21]:
df['gmaps_links'] = df['gmaps_links'].apply(lambda x: x if x else None)

In [22]:
df['gmaps_links']

0             [https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6]
1       [https://maps.app.goo.gl/aH7rvnS5itWGF2dy6, ht...
2                                                    None
3                                                    None
4             [https://maps.app.goo.gl/Pwu2EkJbnj59r3wV8]
                              ...                        
1035                                                 None
1036                                                 None
1037                                                 None
1038                                                 None
1039                                                 None
Name: gmaps_links, Length: 1040, dtype: object

In [23]:
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links
0,"Lunch At A Highway Eatery! Namma Hallimane, By...",https://www.youtube.com/watch?v=HG9JmewVE04,"Only about three years old, however Namma Hall...","[https://youtu.be/7DeoFR_RHRU, https://maps.ap...",[https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6]
1,"Tasting Bengaluru’s Oldest, Most Popular Birya...",https://www.youtube.com/watch?v=m2G-fnkoaOY,#tajhotelbiryani #shivajimilitaryhotelbiryani ...,[https://www.youtube.com/playlist?list=PLri_37...,"[https://maps.app.goo.gl/aH7rvnS5itWGF2dy6, ht..."
2,Tasted This Unbelievably Tasty Root Soup! Muda...,https://www.youtube.com/watch?v=ERWIan4ZYr4,Watch the complete video at https://youtu.be/5...,[https://youtu.be/5s-CtOb09FY],
3,What We Ate At Erodu Amman Mess! Full Menu (al...,https://www.youtube.com/watch?v=EWWe60lHEKw,Watch full video at Watch at https://youtu.be/...,[https://youtu.be/Swvj1J9sJCk],
4,Found This Delicious Dosa Chicken Curry For 50...,https://www.youtube.com/watch?v=APg_flJyXHs,Watch the complete episode at Watch at https:/...,"[https://youtu.be/fUR70aUEMEw, https://maps.ap...",[https://maps.app.goo.gl/Pwu2EkJbnj59r3wV8]


In [24]:
# Function to extract video ID
def get_video_id(link):
    return extract.video_id(link)

# Apply the function to the Link column
df['video_id'] = df['Link'].apply(get_video_id)

In [25]:
df[df["gmaps_links"].isnull()][["Description"]].to_csv("no_gmaps_links.csv", index=False)

In [30]:
def extract_and_translate_transcript(youtube_video_url):
    try:
        video_id = youtube_video_url.split("=")[1]
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        
        # Attempt to get a manually created transcript in English
        try:
            transcript = transcript_list.find_transcript(['en'])
        except NoTranscriptFound:
            # Attempt to get an auto-generated transcript in English
            try:
                transcript = transcript_list.find_generated_transcript(['en'])
            except NoTranscriptFound:
                # Attempt to get a manually created transcript in Hindi or Kannada
                try:
                    transcript = transcript_list.find_transcript(['hi', 'kn'])
                except NoTranscriptFound:
                    return "No transcript available"
        
        transcript_text = transcript.fetch()
        
        full_transcript = ""
        for i in transcript_text:
            full_transcript += " " + i["text"]
        
        # If the transcript is not in English, translate it
        if transcript.language_code not in ['en']:
            translator = Translator()
            translated_transcript = translator.translate(full_transcript, src=transcript.language_code, dest='en')
            return translated_transcript.text.strip()
        
        return full_transcript.strip()
    except Exception as e:
        return f"Error: {e}"

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1040 entries, 0 to 1039
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Title        1040 non-null   object
 1   Link         1040 non-null   object
 2   Description  1040 non-null   object
 3   Links        1040 non-null   object
 4   gmaps_links  596 non-null    object
 5   video_id     1040 non-null   object
dtypes: object(6)
memory usage: 48.9+ KB


In [33]:
def process_row(row):
    print(f"Processing row {row.name}")
    return extract_and_translate_transcript(row['Link'])

df['transcript'] = df.apply(process_row, axis=1)

Processing row 0
Processing row 1
Processing row 2
Processing row 3
Processing row 4
Processing row 5
Processing row 6
Processing row 7
Processing row 8
Processing row 9
Processing row 10
Processing row 11
Processing row 12
Processing row 13
Processing row 14
Processing row 15
Processing row 16
Processing row 17
Processing row 18
Processing row 19
Processing row 20
Processing row 21
Processing row 22
Processing row 23
Processing row 24
Processing row 25
Processing row 26
Processing row 27
Processing row 28
Processing row 29
Processing row 30
Processing row 31
Processing row 32
Processing row 33
Processing row 34
Processing row 35
Processing row 36
Processing row 37
Processing row 38
Processing row 39
Processing row 40
Processing row 41
Processing row 42
Processing row 43
Processing row 44
Processing row 45
Processing row 46
Processing row 47
Processing row 48
Processing row 49
Processing row 50
Processing row 51
Processing row 52
Processing row 53
Processing row 54
Processing row 55
Pr

In [34]:
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links,video_id,transcript
0,"Lunch At A Highway Eatery! Namma Hallimane, By...",https://www.youtube.com/watch?v=HG9JmewVE04,"Only about three years old, however Namma Hall...","[https://youtu.be/7DeoFR_RHRU, https://maps.ap...",[https://maps.app.goo.gl/5bcyGF2Xys5U8cJH6],HG9JmewVE04,M Kaa M Kaa M [Music] mut Chicken Chops one by...
1,"Tasting Bengaluru’s Oldest, Most Popular Birya...",https://www.youtube.com/watch?v=m2G-fnkoaOY,#tajhotelbiryani #shivajimilitaryhotelbiryani ...,[https://www.youtube.com/playlist?list=PLri_37...,"[https://maps.app.goo.gl/aH7rvnS5itWGF2dy6, ht...",m2G-fnkoaOY,so the third Biryani that we about to taste is...
2,Tasted This Unbelievably Tasty Root Soup! Muda...,https://www.youtube.com/watch?v=ERWIan4ZYr4,Watch the complete video at https://youtu.be/5...,[https://youtu.be/5s-CtOb09FY],,ERWIan4ZYr4,mudat Kang so Kanga means tuba is it tuba and ...
3,What We Ate At Erodu Amman Mess! Full Menu (al...,https://www.youtube.com/watch?v=EWWe60lHEKw,Watch full video at Watch at https://youtu.be/...,[https://youtu.be/Swvj1J9sJCk],,EWWe60lHEKw,[Music] well I thought we said only a few dish...
4,Found This Delicious Dosa Chicken Curry For 50...,https://www.youtube.com/watch?v=APg_flJyXHs,Watch the complete episode at Watch at https:/...,"[https://youtu.be/fUR70aUEMEw, https://maps.ap...",[https://maps.app.goo.gl/Pwu2EkJbnj59r3wV8],APg_flJyXHs,this is about how much 50 50 rupees and the do...


In [35]:
df.to_csv("youtube_videos.csv", index=False)