In [1]:
from googleapiclient.discovery import build
from IPython.display import JSON, display
import re
import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
import os
from kaggle_secrets import UserSecretsClient
import git
from git import Repo
import shutil
from pytz import timezone

In [2]:
api_key = UserSecretsClient().get_secret("api_key2")
api_service_name = "youtube"
api_version = "v3"
youtube = build(api_service_name, api_version, developerKey=api_key)
user_secrets = UserSecretsClient()
ist = timezone('Asia/Kolkata')
timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H:%M:%S")

In [3]:
kw_list =  "donut recipes"

In [4]:
def VideoDetailExtraction(publishedAfter,maxResults = 5):
    request = youtube.search().list(part = 'snippet',
                                    order = 'viewCount',
                                    q = kw_list,
                                    relevanceLanguage = 'en',
                                    type = 'video',
                                    videoCategoryId = 26,
                                    regionCode = 'US',
                                    maxResults =5,
                                    publishedAfter = publishedAfter)
    response = request.execute()
    videoDetails = []
    videoIds = []
    for i in range(0,maxResults):
        publishTime = re.split(r'[TZ-]',response['items'][0].get('snippet','0000-00-00T00:00:00Z').get('publishTime','0000-00-00T00:00:00Z'))

        videoDetails.append({'videoId' : response['items'][i]['id']['videoId'],
            'channelId' : response['items'][i]['snippet']['channelId'],
            'channelName' : response['items'][i]['snippet']['channelTitle'],
            'videoTitle' : response['items'][i]['snippet']['title'],
            'description' : response['items'][i]['snippet']['description'],
            'publishYear' : publishTime[0], #year
            'publishMonth' : publishTime[1], #month
            'publishDay' : publishTime[2], #day
            'publishTime' : publishTime[3], #hh:mm:ss
            'publishedOn' : response['items'][0]['snippet']['publishTime']
        })

        videoIds.append(response['items'][i]['id']['videoId'])

    
    request = youtube.videos().list(
        part = 'statistics,snippet', 
        id = videoIds
    )
    response = request.execute()
    JSON(response)
    for i in range(0,maxResults):
        videoDetails[i]['viewCount'] = response['items'][i]['statistics'].get('viewCount',0)
        videoDetails[i]['likeCount'] = response['items'][i]['statistics'].get('likeCount',0)
        videoDetails[i]['favoriteCount'] = response['items'][i]['statistics'].get('favoriteCount',0)
        videoDetails[i]['commentCount'] = response['items'][i]['statistics'].get('commentCount',0)
        videoDetails[i]['videoTags'] = response['items'][i]['snippet'].get('tags',[])
    # display(videoDetails)
    videoDetails = pd.DataFrame(videoDetails)
    return videoDetails

In [5]:
def PushToGithub():
    output_files = os.listdir('/kaggle/working')
    # print("Files in /kaggle/working:", output_files)
    # repo_url = user_secrets.get_secret("Repo_URL_YouTubeFood")
    LatestFiles = []
    LatestFiles.append(max([file for file in output_files if file.startswith("Yesterday")], 
                key=os.path.getctime))
    LatestFiles.append(max([file for file in output_files if file.startswith("Last30Days")],
                     key = os.path.getctime))
    LatestFiles.append(max([file for file in output_files if file.startswith("Last1Year")],
                    key = os.path.getctime))
    LatestFiles.append(max([file for file in output_files if file.startswith("Last5Year")],
                    key = os.path.getctime))
    print(LatestFiles)
    if os.path.exists('/kaggle/working/YouTubeFoodChannelAnalysis'):
        print("cloned and the repo file exist")
        repo = git.Repo("/kaggle/working/YouTubeFoodChannelAnalysis") 
        origin = repo.remote(name='origin') 
        origin.pull()
        print("successfully pulled the git repo")
    else:
        repo_url = UserSecretsClient().get_secret("repo_url_youtube_analysis")
        # print(repo_url)
        local_path = "/kaggle/working/YouTubeFoodChannelAnalysis"
        repo = git.Repo.clone_from(repo_url, local_path)
        print("successfully cloned the git repo")
    
        
    output_files = os.listdir('/kaggle/working/YouTubeFoodChannelAnalysis')
    print(output_files)
    for i in LatestFiles:
        if i.startswith("Yesterday"):
            if os.path.exists('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Yesterday'):
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Yesterday/{i}')
                    
            else:
                os.makedirs('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Yesterday')
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Yesterday/{i}')
        elif i.startswith("Last30Days"):
            if os.path.exists('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last30Days'):
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last30Days/{i}')
                    
            else:
                os.makedirs('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last30Days')
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last30Days/{i}')
        elif i.startswith("Last1Year"):
            if os.path.exists('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last1Year'):
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last1Year/{i}')
                    
            else:
                os.makedirs('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last1Year')
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last1Year/{i}')
        elif i.startswith("Last5Year"):
            if os.path.exists('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last5Year'):
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last5Year/{i}')
                    
            else:
                os.makedirs('/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last5Year')
                shutil.copyfile(f'/kaggle/working/{i}', f'/kaggle/working/YouTubeFoodChannelAnalysis/Source/Last5Year/{i}')
            
    repo_dir = "/kaggle/working/YouTubeFoodChannelAnalysis"
    repo = Repo(repo_dir)
    repo.git.add(all=True)
    repo.index.commit(f"{timestamp} Added files from Kaggle notebook, {LatestFiles}")
    origin = repo.remote(name="origin")
    origin.push()
    print("Output files successfully pushed to GitHub!")

    

In [6]:
def RawFile(dataframe,prefix):
    # Generate a timestamped filename
    # Set Indian Standard Time (IST)
    filename = f"{prefix}_{timestamp}.json"
    # Save the DataFrame as a JSON file
    dataframe.to_json(filename, orient="records", indent=4)
    print(f"DataFrame saved as {filename}")
    return True

In [7]:
def TopVideosBasedOnTime():

    # currentDate = datetime.date.today()
    currentDate = datetime.datetime.now(ist).date()  # Current date in IST
    print(currentDate)
    #today
    Yesterday = currentDate + relativedelta(days=-1)
    Yesterday = "{}T00:00:00Z".format(Yesterday)
    Yesterday_response = VideoDetailExtraction(Yesterday,1)
    print(Yesterday)
    RawFile(Yesterday_response,'Yesterday')
    #Last 30 days
    Last30Days = currentDate + relativedelta(days=-30)
    Last30Days = "{}T00:00:00Z".format(Last30Days)
    Last30Days_response = VideoDetailExtraction(Last30Days,1)
    print(Last30Days)
    RawFile(Last30Days_response,'Last30Days')
    #Last 1 year
    Last1Year = currentDate + relativedelta(years = -1)
    Last1Year = "{}T00:00:00Z".format(Last1Year)
    Last1Year_response = VideoDetailExtraction(Last1Year,1)
    print(Last1Year)
    RawFile(Last1Year_response,'Last1Year')
    #Last 5 years
    Last5Years = currentDate + relativedelta(years = -5)
    Last5Years = "{}T00:00:00Z".format(Last5Years)
    Last5Years_response = VideoDetailExtraction(Last5Years,1)
    print(Last5Years)
    RawFile(Last5Years_response,'Last5Years')
    
    return True


In [8]:
def main():
    TopVideosBasedOnTime()
    PushToGithub()

In [9]:
if __name__=="__main__":
    display(main())

2025-01-19
2025-01-18T00:00:00Z
DataFrame saved as Yesterday_2025-01-19_14:21:59.json
2024-12-20T00:00:00Z
DataFrame saved as Last30Days_2025-01-19_14:21:59.json
2024-01-19T00:00:00Z
DataFrame saved as Last1Year_2025-01-19_14:21:59.json
2020-01-19T00:00:00Z
DataFrame saved as Last5Years_2025-01-19_14:21:59.json
['Yesterday_2025-01-19_14:21:59.json', 'Last30Days_2025-01-19_14:21:59.json', 'Last1Year_2025-01-19_14:21:59.json', 'Last5Years_2025-01-19_14:21:59.json']
successfully cloned the git repo
['Source', 'README.md', '.git', 'YoutubeAnalysis.ipynb']
Output files successfully pushed to GitHub!


None

In [10]:
request = youtube.search().list(part = 'snippet',
                                    order = 'date',
                                    q = "donut recipes",
                                    relevanceLanguage = 'en',
                                    type = 'video',
                                    videoCategoryId = 26,
                                    maxResults=50,
                                    # regionCode = 'IN',
                                    videoCaption = 'closedCaption',
                                    publishedAfter = '2009-01-01T00:00:00Z',
                                    publishedBefore = '2024-12-31T00:00:00Z')
response = request.execute()
JSON(response)

<IPython.core.display.JSON object>

In [11]:
print(response)

{'kind': 'youtube#searchListResponse', 'etag': 'QcvqfOHyI2jTmldpShnZOTon9Q4', 'nextPageToken': 'CDIQAA', 'regionCode': 'TW', 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 50}, 'items': [{'kind': 'youtube#searchResult', 'etag': '4RbPRvdgkJGP-AM05IBsDZp0Q9U', 'id': {'kind': 'youtube#video', 'videoId': 'aL213kqeVJ8'}, 'snippet': {'publishedAt': '2024-12-24T12:03:33Z', 'channelId': 'UCQ5eWCiIqR92C34IDEpDgUQ', 'title': 'Donut Recipe | टेस्टी डोनट बनविण्याची एकदम परफेक्ट व सोपी पद्धत | Donuts | Christmas Special Recipes', 'description': 'Donut Recipe | टेस्टी डोनट बनविण्याची एकदम परफेक्ट व सोपी पद्धत | Donuts ...', 'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/aL213kqeVJ8/default.jpg', 'width': 120, 'height': 90}, 'medium': {'url': 'https://i.ytimg.com/vi/aL213kqeVJ8/mqdefault.jpg', 'width': 320, 'height': 180}, 'high': {'url': 'https://i.ytimg.com/vi/aL213kqeVJ8/hqdefault.jpg', 'width': 480, 'height': 360}}, 'channelTitle': "Priya's Tasty Recipes", 'liveBroadcastConten

In [12]:
# !pwd  # To print the current working directory


In [13]:
# !ls  # List files in the current directory


In [14]:
# %cd /YouTubeFoodChannelAnalysis  # Change directory to the cloned repository


In [15]:
# %cd YouTubeFoodChannelAnalysis  # Change to the existing directory
# !git pull                       # Pull the latest changes


In [16]:
# !ls /kaggle/working



In [17]:
# !rm -rf /kaggle/working/YouTubeFoodChannelAnalysis  # Remove the existing folder if it's not a Git repo


In [18]:
# !ls -al ~/.ssh