In [1]:
from googleapiclient.discovery import build
import json
import pandas as pd
from pytube import extract
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
# from googletrans import Translator

In [2]:
with open("youtube_api_key.txt","r") as file:
    api_key = file.read().strip()

In [3]:
channel_id = "UC-Lq6oBPTgTXT_K-ylWL6hg"

In [4]:
# Build the YouTube API client
youtube = build("youtube", "v3", developerKey=api_key)

In [5]:
def get_channel_videos(channel_id):
    # Retrieve the uploads playlist ID
    request = youtube.channels().list(
        part="contentDetails",
        id=channel_id
    )
    response = request.execute()
    uploads_playlist_id = response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    # Retrieve all videos in the uploads playlist
    videos = []
    next_page_token = None
    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=uploads_playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()
        videos += response["items"]
        next_page_token = response.get("nextPageToken")
        if next_page_token is None:
            break

    return videos

In [6]:
def get_video_descriptions(videos):
    descriptions = []
    for video in videos:
        video_id = video["snippet"]["resourceId"]["videoId"]
        title = video["snippet"]["title"]
        description = video["snippet"]["description"]
        descriptions.append({"title": title, "description": description, "video_id": video_id})
    return descriptions

In [7]:
# Get all videos from the channel
videos = get_channel_videos(channel_id)

# Get descriptions of all videos
descriptions = get_video_descriptions(videos)

In [8]:
videos[0]

{'kind': 'youtube#playlistItem',
 'etag': '2jU_GLtY6oDQGA_b-9wlk4A9iho',
 'id': 'VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLlNwSkx6M3gzMC1N',
 'snippet': {'publishedAt': '2024-06-29T05:30:07Z',
  'channelId': 'UC-Lq6oBPTgTXT_K-ylWL6hg',
  'title': 'From Working In Top Chinese Restaurants 40 Years To Their Own Eatery! MY LITTLE CHINA, Bengaluru',
  'description': '#mylittlechina #chinesefood #bangalore\n \nmy little china, chinese food, chinese restaurant, indo chinese cuisine, kothanur, bengaluru, bangalore, chinese food in bangalore, chicken fried rice, noodles, support small business, bengaluru food tour, bangalore food walk, bangalore food trail, karnataka food tour, support local, gourmet on the road, kripal amanna, food lovers tv, food lovers india, truth in food \n\nPradeep moved from his hometown in Nepal to Bengaluru over 40 years ago to seek employment in Chinese owned kitchens. SInce then, he has worked in some of the top Chinese restaurants in the city and also in Mumbai. Joined by hi

In [9]:
descriptions[0]

{'title': 'From Working In Top Chinese Restaurants 40 Years To Their Own Eatery! MY LITTLE CHINA, Bengaluru',
 'description': '#mylittlechina #chinesefood #bangalore\n \nmy little china, chinese food, chinese restaurant, indo chinese cuisine, kothanur, bengaluru, bangalore, chinese food in bangalore, chicken fried rice, noodles, support small business, bengaluru food tour, bangalore food walk, bangalore food trail, karnataka food tour, support local, gourmet on the road, kripal amanna, food lovers tv, food lovers india, truth in food \n\nPradeep moved from his hometown in Nepal to Bengaluru over 40 years ago to seek employment in Chinese owned kitchens. SInce then, he has worked in some of the top Chinese restaurants in the city and also in Mumbai. Joined by his brother, also a service staff veteran in Chinese restaurants, the duo opened My Little China, a modest restaurant in Kothanur, East Bengaluru nine years ago. I dropped into their restaurant impromptu one afternoon and tasted a 

In [10]:
def get_video_details(videos):
    video_details = []
    for video in videos:
        video_id = video["snippet"]["resourceId"]["videoId"]
        title = video["snippet"]["title"]
        description = video["snippet"]["description"]
        link = f"https://www.youtube.com/watch?v={video_id}"
        video_details.append({"Title": title, "Link": link, "Description": description})
    return video_details

In [11]:
video_details = get_video_details(videos)

In [12]:
df = pd.DataFrame(video_details)
df.head()

Unnamed: 0,Title,Link,Description
0,From Working In Top Chinese Restaurants 40 Yea...,https://www.youtube.com/watch?v=SpJLz3x30-M,#mylittlechina #chinesefood #bangalore\n \nmy ...
1,Try This Hidden Udupi Style Seafood Eatery In ...,https://www.youtube.com/watch?v=Sc3ej2j_atk,Watch the full video at https://youtu.be/Rn-UW...
2,Tasted This 'Smokey' Biryani! #biryani #foodvl...,https://www.youtube.com/watch?v=NHmKRUMSQv4,Watch complete video at https://youtu.be/Swvj1...
3,From A Job To Push Cart To His Own Shop! Coorg...,https://www.youtube.com/watch?v=xmnw9Vd5JTY,#coorgstyleporkcorner #coorgfood #bangalore\n ...
4,This Tiny Idli Vade Shop Is An Inspiration! St...,https://www.youtube.com/watch?v=muG-YG7JuDw,Watch the complete video at https://youtu.be/3...


In [13]:
df["Description"][:10]

0    #mylittlechina #chinesefood #bangalore\n \nmy ...
1    Watch the full video at https://youtu.be/Rn-UW...
2    Watch complete video at https://youtu.be/Swvj1...
3    #coorgstyleporkcorner #coorgfood #bangalore\n ...
4    Watch the complete video at https://youtu.be/3...
5    This Couple Brings Pune Street Food To Bengalu...
6    #subbannasbiryani #biryani #bangalore\n \nsubb...
7    If you love a good vegetarian lunch, the meal ...
8    #srivinayakacondiments #idlichutney #bangalore...
9    Watch the complete video at https://youtu.be/Q...
Name: Description, dtype: object

In [14]:
print(df["Description"][468])

#pizza #yelahanka #bangalore

best pizzas, north bangalore, bengaluru, siddys pizza, pizza recipe, pizza, pizzeria, margherita, smoked chicken, garlic bread, pepperoni, handmade pizzas, italian, authentic sicilian style pizzas, siddy, siddhanth sawkar, yelahanka, bangalore

When Siddhanth Sawkar was forced to close his café and home catering-turned-food truck brand, Spitfire Barbeque, owing to the pandemic, he spent a couple of months mulling his next move, making pizzas. Not just a generic version as dished out by fast food chains! Instead, he dug deep into his experience as a culinary student in Southern Italy, to open a pizzeria serving Sicilian style pizzas in his erstwhile North Bengaluru café in Yelahanka New Town. Siddy’s Pizza, as I discovered during a leisurely visit, is a one-man passion enterprise, where Siddy, as Siddhanth is known, does everything from taking the order to putting together the pizzas to serving customers who quite often chat with him as the cheese-laden fla

In [15]:
pd.DataFrame(videos[:5])

Unnamed: 0,kind,etag,id,snippet
0,youtube#playlistItem,2jU_GLtY6oDQGA_b-9wlk4A9iho,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLlNwSkx6M3gzMC1N,"{'publishedAt': '2024-06-29T05:30:07Z', 'chann..."
1,youtube#playlistItem,3r6rRcv4gEfNXbFvG_fWIuXFcZc,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLlNjM2VqMmpfYXRr,"{'publishedAt': '2024-06-28T06:30:10Z', 'chann..."
2,youtube#playlistItem,0sm7hBjtXGMzIMvg5Ntylu1LUUM,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLk5IbUtSVU1TUXY0,"{'publishedAt': '2024-06-27T12:00:33Z', 'chann..."
3,youtube#playlistItem,PrMfGxKl9aYALeWxTESmYmUj8no,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLnhtbnc5VmQ1SlRZ,"{'publishedAt': '2024-06-26T06:30:18Z', 'chann..."
4,youtube#playlistItem,JLm0EXAS3gHMN0oFugDLMdh_3jI,VVUtTHE2b0JQVGdUWFRfSy15bFdMNmhnLm11Ry1ZRzdKdUR3,"{'publishedAt': '2024-06-25T11:30:22Z', 'chann..."


In [16]:
import re
def extract_links(text):
    # Regular expression to find URLs
    url_pattern = re.compile(r'(https?://\S+)')
    return url_pattern.findall(text)

In [17]:
df["Links"] = df["Description"].apply(extract_links)

In [18]:
df.head()

Unnamed: 0,Title,Link,Description,Links
0,From Working In Top Chinese Restaurants 40 Yea...,https://www.youtube.com/watch?v=SpJLz3x30-M,#mylittlechina #chinesefood #bangalore\n \nmy ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ..."
1,Try This Hidden Udupi Style Seafood Eatery In ...,https://www.youtube.com/watch?v=Sc3ej2j_atk,Watch the full video at https://youtu.be/Rn-UW...,"[https://youtu.be/Rn-UWM54GNA, https://maps.ap..."
2,Tasted This 'Smokey' Biryani! #biryani #foodvl...,https://www.youtube.com/watch?v=NHmKRUMSQv4,Watch complete video at https://youtu.be/Swvj1...,"[https://youtu.be/Swvj1J9sJCk, https://maps.ap..."
3,From A Job To Push Cart To His Own Shop! Coorg...,https://www.youtube.com/watch?v=xmnw9Vd5JTY,#coorgstyleporkcorner #coorgfood #bangalore\n ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ..."
4,This Tiny Idli Vade Shop Is An Inspiration! St...,https://www.youtube.com/watch?v=muG-YG7JuDw,Watch the complete video at https://youtu.be/3...,"[https://youtu.be/3T8u-N77W7o, https://maps.ap..."


In [19]:
def filter_gmaps_links(links):
    gmaps_pattern = re.compile(r'(https?://(?:goo\.gl/maps|maps\.google|google\.com/maps|maps\.app\.goo\.gl|g\.page)\S+)')
    return [link for link in links if gmaps_pattern.match(link)]

In [20]:
df['gmaps_links'] = df['Links'].apply(filter_gmaps_links)
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links
0,From Working In Top Chinese Restaurants 40 Yea...,https://www.youtube.com/watch?v=SpJLz3x30-M,#mylittlechina #chinesefood #bangalore\n \nmy ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/EfSxRJfSabJ6HmU58]
1,Try This Hidden Udupi Style Seafood Eatery In ...,https://www.youtube.com/watch?v=Sc3ej2j_atk,Watch the full video at https://youtu.be/Rn-UW...,"[https://youtu.be/Rn-UWM54GNA, https://maps.ap...",[https://maps.app.goo.gl/CmCEQq2G1MBfZtpf9]
2,Tasted This 'Smokey' Biryani! #biryani #foodvl...,https://www.youtube.com/watch?v=NHmKRUMSQv4,Watch complete video at https://youtu.be/Swvj1...,"[https://youtu.be/Swvj1J9sJCk, https://maps.ap...",[https://maps.app.goo.gl/gGSmVCU9w5efBReT8]
3,From A Job To Push Cart To His Own Shop! Coorg...,https://www.youtube.com/watch?v=xmnw9Vd5JTY,#coorgstyleporkcorner #coorgfood #bangalore\n ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/MLJhu8fyUkSBSSwj6]
4,This Tiny Idli Vade Shop Is An Inspiration! St...,https://www.youtube.com/watch?v=muG-YG7JuDw,Watch the complete video at https://youtu.be/3...,"[https://youtu.be/3T8u-N77W7o, https://maps.ap...",[https://maps.app.goo.gl/NtsKYRsWHKu7L3Wr5]


In [21]:
df['gmaps_links'] = df['gmaps_links'].apply(lambda x: x if x else None)

In [22]:
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links
0,From Working In Top Chinese Restaurants 40 Yea...,https://www.youtube.com/watch?v=SpJLz3x30-M,#mylittlechina #chinesefood #bangalore\n \nmy ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/EfSxRJfSabJ6HmU58]
1,Try This Hidden Udupi Style Seafood Eatery In ...,https://www.youtube.com/watch?v=Sc3ej2j_atk,Watch the full video at https://youtu.be/Rn-UW...,"[https://youtu.be/Rn-UWM54GNA, https://maps.ap...",[https://maps.app.goo.gl/CmCEQq2G1MBfZtpf9]
2,Tasted This 'Smokey' Biryani! #biryani #foodvl...,https://www.youtube.com/watch?v=NHmKRUMSQv4,Watch complete video at https://youtu.be/Swvj1...,"[https://youtu.be/Swvj1J9sJCk, https://maps.ap...",[https://maps.app.goo.gl/gGSmVCU9w5efBReT8]
3,From A Job To Push Cart To His Own Shop! Coorg...,https://www.youtube.com/watch?v=xmnw9Vd5JTY,#coorgstyleporkcorner #coorgfood #bangalore\n ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/MLJhu8fyUkSBSSwj6]
4,This Tiny Idli Vade Shop Is An Inspiration! St...,https://www.youtube.com/watch?v=muG-YG7JuDw,Watch the complete video at https://youtu.be/3...,"[https://youtu.be/3T8u-N77W7o, https://maps.ap...",[https://maps.app.goo.gl/NtsKYRsWHKu7L3Wr5]


In [23]:
# Function to extract video ID
def get_video_id(link):
    return extract.video_id(link)

# Apply the function to the Link column
df['video_id'] = df['Link'].apply(get_video_id)

In [24]:
df[df["gmaps_links"].isnull()][["Description"]].to_csv("no_gmaps_links.csv", index=False)

In [25]:
def extract_and_translate_transcript(youtube_video_url):
    try:
        video_id = youtube_video_url.split("=")[1]
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        
        # Attempt to get a manually created transcript in English
        try:
            transcript = transcript_list.find_transcript(['en'])
        except NoTranscriptFound:
            # Attempt to get an auto-generated transcript in English
            try:
                transcript = transcript_list.find_generated_transcript(['en'])
            except NoTranscriptFound:
                # Attempt to get a manually created transcript in Hindi or Kannada
                try:
                    transcript = transcript_list.find_transcript(['hi', 'kn'])
                except NoTranscriptFound:
                    return "No transcript available"
        
        transcript_text = transcript.fetch()
        
        full_transcript = ""
        for i in transcript_text:
            full_transcript += " " + i["text"]
        
        # If the transcript is not in English, translate it
        if transcript.language_code not in ['en']:
            translator = Translator()
            translated_transcript = translator.translate(full_transcript, src=transcript.language_code, dest='en')
            return translated_transcript.text.strip()
        
        return full_transcript.strip()
    except Exception as e:
        return f"Error: {e}"

In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1061 entries, 0 to 1060
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Title        1061 non-null   object
 1   Link         1061 non-null   object
 2   Description  1061 non-null   object
 3   Links        1061 non-null   object
 4   gmaps_links  612 non-null    object
 5   video_id     1061 non-null   object
dtypes: object(6)
memory usage: 49.9+ KB


In [27]:
def process_row(row):
    print(f"Processing row {row.name}")
    return extract_and_translate_transcript(row['Link'])

df['transcript'] = df.apply(process_row, axis=1)

Processing row 0
Processing row 1
Processing row 2
Processing row 3
Processing row 4
Processing row 5
Processing row 6
Processing row 7
Processing row 8
Processing row 9
Processing row 10
Processing row 11
Processing row 12
Processing row 13
Processing row 14
Processing row 15
Processing row 16
Processing row 17
Processing row 18
Processing row 19
Processing row 20
Processing row 21
Processing row 22
Processing row 23
Processing row 24
Processing row 25
Processing row 26
Processing row 27
Processing row 28
Processing row 29
Processing row 30
Processing row 31
Processing row 32
Processing row 33
Processing row 34
Processing row 35
Processing row 36
Processing row 37
Processing row 38
Processing row 39
Processing row 40
Processing row 41
Processing row 42
Processing row 43
Processing row 44
Processing row 45
Processing row 46
Processing row 47
Processing row 48
Processing row 49
Processing row 50
Processing row 51
Processing row 52
Processing row 53
Processing row 54
Processing row 55
Pr

In [28]:
df.head()

Unnamed: 0,Title,Link,Description,Links,gmaps_links,video_id,transcript
0,From Working In Top Chinese Restaurants 40 Yea...,https://www.youtube.com/watch?v=SpJLz3x30-M,#mylittlechina #chinesefood #bangalore\n \nmy ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/EfSxRJfSabJ6HmU58],SpJLz3x30-M,Error: name 'Translator' is not defined
1,Try This Hidden Udupi Style Seafood Eatery In ...,https://www.youtube.com/watch?v=Sc3ej2j_atk,Watch the full video at https://youtu.be/Rn-UW...,"[https://youtu.be/Rn-UWM54GNA, https://maps.ap...",[https://maps.app.goo.gl/CmCEQq2G1MBfZtpf9],Sc3ej2j_atk,Error: name 'Translator' is not defined
2,Tasted This 'Smokey' Biryani! #biryani #foodvl...,https://www.youtube.com/watch?v=NHmKRUMSQv4,Watch complete video at https://youtu.be/Swvj1...,"[https://youtu.be/Swvj1J9sJCk, https://maps.ap...",[https://maps.app.goo.gl/gGSmVCU9w5efBReT8],NHmKRUMSQv4,well we had almost forgotten the [Music] Birya...
3,From A Job To Push Cart To His Own Shop! Coorg...,https://www.youtube.com/watch?v=xmnw9Vd5JTY,#coorgstyleporkcorner #coorgfood #bangalore\n ...,"[https://www.youtube.com/c/KripalAmannaVlogs, ...",[https://maps.app.goo.gl/MLJhu8fyUkSBSSwj6],xmnw9Vd5JTY,[Music] green garc this is basically the Kaa M...
4,This Tiny Idli Vade Shop Is An Inspiration! St...,https://www.youtube.com/watch?v=muG-YG7JuDw,Watch the complete video at https://youtu.be/3...,"[https://youtu.be/3T8u-N77W7o, https://maps.ap...",[https://maps.app.goo.gl/NtsKYRsWHKu7L3Wr5],muG-YG7JuDw,Error: name 'Translator' is not defined


In [29]:
df.to_csv("youtube_videos.csv", index=False)