In [471]:
import requests
import numpy as np
import pandas as pd
import time

In [525]:
TRENDING_TYPE_DICT = {
    'Now': None,
    'Music': '4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
    'Gaming': '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
    'Movie': '4gIKGgh0cmFpbGVycw%3D%3D',
}

ENGLISH_COUNTRY_CODE_LIST = ['US', 'CA', 'AU']
FRENCH_COUNTRY_CODE_LIST = ['FR']

In [526]:
def post_request_ytb(country_code_list, trending_type_dict):
    responses_dict = {}
    for country_code in country_code_list:
        responses_dict[country_code] = {}
        for trending_type, trending_type_url in trending_type_dict.items():
            json_data = {
                'context': {
                    'client': {
                        'gl': country_code,
                        'clientName': 'WEB',
                        'clientVersion': '2.20231115.01.01',
                        'originalUrl': 'https://www.youtube.com/feed/trending',
                    },
                },
                'browseId': 'FEtrending',
                'params': trending_type_url,
            }

            response = requests.post(
                'https://www.youtube.com/youtubei/v1/browse',
                json=json_data,
            )
            responses_dict[country_code][trending_type] = response.json()
            time.sleep(5)
    return responses_dict        

In [527]:
def collect_video_data(video_list, data_dictionary, trending_type, country_code):
    # Collect data (generic videos) INPLACE
    nb_items = len(video_list)
    data_dictionary["videoTitle"] += [video_list[k]["videoRenderer"]["title"]["runs"][0]["text"] for k in range(nb_items)]
    data_dictionary["videoId"] += [video_list[k]["videoRenderer"]["videoId"] for k in range(nb_items)]
    data_dictionary["videoThumbnailUrl"] += [video_list[k]["videoRenderer"]["thumbnail"]["thumbnails"][2]["url"] for k in range(nb_items)]
    for k in range(nb_items):
        try:
            data_dictionary["videoDescriptionSnippet"].append(video_list[k]["videoRenderer"]["descriptionSnippet"]["runs"][0]['text'])
        except KeyError:
            data_dictionary["videoDescriptionSnippet"].append(np.nan)
    data_dictionary["videoRelativePublishedTimeText"] += [video_list[k]["videoRenderer"]["publishedTimeText"]['simpleText'] for k in range(nb_items)]
    data_dictionary["videoLength"] += [video_list[k]["videoRenderer"]["lengthText"]["simpleText"] for k in range(nb_items)]   
    data_dictionary["videoViewCountText"] += [video_list[k]["videoRenderer"]["viewCountText"]["simpleText"] for k in range(nb_items)]
    data_dictionary["videoCreatorName"] += [video_list[k]["videoRenderer"]["ownerText"]["runs"][0]["text"] for k in range(nb_items)]
    data_dictionary["videoType"] += [trending_type for k in range(nb_items)]
    data_dictionary["trendingCountry"] += [country_code for k in range(nb_items)]
    return None

In [528]:
def collect_short_data(shorts_list, data_dictionary, country_code):
    # Collect data (generic videos) INPLACE
    nb_items = len(shorts_list)
    data_dictionary["videoTitle"] += [shorts_list[k]["reelItemRenderer"]["headline"]["simpleText"] for k in range(nb_items)]
    data_dictionary["videoId"] += [shorts_list[k]["reelItemRenderer"]["videoId"] for k in range(nb_items)]
    data_dictionary["videoThumbnailUrl"] += [shorts_list[k]["reelItemRenderer"]["thumbnail"]["thumbnails"][0]["url"] for k in range(nb_items)]
    data_dictionary["videoDescriptionSnippet"] += [np.nan for k in range(nb_items)]
    data_dictionary["videoRelativePublishedTimeText"] += [np.nan for k in range(nb_items)]
    data_dictionary["videoLength"] += [np.nan for k in range(nb_items)]   
    data_dictionary["videoViewCountText"] += [shorts_list[k]["reelItemRenderer"]["viewCountText"]["simpleText"] for k in range(nb_items)]
    data_dictionary["videoCreatorName"] += [np.nan for k in range(nb_items)]
    data_dictionary["videoType"] += ['Short' for k in range(nb_items)]
    data_dictionary["trendingCountry"] += [country_code for k in range(nb_items)]
    return None

In [529]:
def add_now_videos_shorts(response_dict, data_dictionary):
    # Update data_dictionary with "Now", "Recently Trending" and "Shorts" videos data INPLACE
    for country_code in response_dict.keys():
        print("Current country : ", country_code)
        section_now = response_dict[country_code]['Now']["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][0]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
        list_videos_before = section_now[0]["itemSectionRenderer"]["contents"][0]["shelfRenderer"]["content"]["expandedShelfContentsRenderer"]["items"]
        shorts_list = section_now[1]["itemSectionRenderer"]["contents"][0]["reelShelfRenderer"]["items"]
        list_videos_after = section_now[2]["itemSectionRenderer"]["contents"][0]["shelfRenderer"]["content"]["expandedShelfContentsRenderer"]["items"]
        list_videos_recently = section_now[3]["itemSectionRenderer"]["contents"][0]["shelfRenderer"]["content"]["expandedShelfContentsRenderer"]["items"]
        collect_video_data(list_videos_before, data_dictionary, 'Now', country_code)
        collect_video_data(list_videos_after, data_dictionary, 'Now', country_code)
        collect_video_data(list_videos_recently, data_dictionary, 'Recently Trending', country_code)
        collect_short_data(shorts_list, data_dictionary, country_code)
    return None

In [530]:
def add_other_sections(response_dict, data_dictionary):
    # Update data_dictionary with "Music", "Gaming", "Movies" videos data INPLACE
    for country_code in response_dict.keys():
        for idx, key in enumerate(response_dict[country_code].keys()): # in recent Python versions, dictionary are ordered
            if idx != 0:
                sections = response_dict[country_code][key]["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][idx]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
                video_list = sections[0]["itemSectionRenderer"]["contents"][0]["shelfRenderer"]["content"]["expandedShelfContentsRenderer"]["items"]
                collect_video_data(video_list, data_dictionary, key, country_code)
    return None

In [531]:
def update_video_data(data_dictionary):
    # Update data for individual videos in data_dictionary INPLACE
    for video_id in data_dictionary["videoId"]:
        print("Currently processing video_id:", video_id)
        json_data = {
            'context': {
                'client': {
                    'clientName': 'WEB',
                    'clientVersion': '2.20231208.01.00',            
                    }
            },
            'videoId': video_id,
        }
        
        response = requests.post('https://www.youtube.com/youtubei/v1/next', json=json_data)
        video_response = response.json()
        individual_video_data = video_response["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"]
        data_dictionary["exactViewNumber"].append(individual_video_data[0]["videoPrimaryInfoRenderer"]["viewCount"]["videoViewCountRenderer"]["viewCount"]["simpleText"])
        data_dictionary["numberLikes"].append(individual_video_data[0]["videoPrimaryInfoRenderer"]["videoActions"]["menuRenderer"]["topLevelButtons"][0]["segmentedLikeDislikeButtonViewModel"]["likeButtonViewModel"]["likeButtonViewModel"]["toggleButtonViewModel"]["toggleButtonViewModel"]["defaultButtonViewModel"]["buttonViewModel"]["title"])
        data_dictionary["videoDate"].append(individual_video_data[0]["videoPrimaryInfoRenderer"]["dateText"]["simpleText"])
        data_dictionary["creatorSubscriberNumber"].append(individual_video_data[1]["videoSecondaryInfoRenderer"]["owner"]["videoOwnerRenderer"]["subscriberCountText"]["simpleText"])
        try:
            data_dictionary["videoVerboseDescription"].append(individual_video_data[1]["videoSecondaryInfoRenderer"]["attributedDescription"]["content"])
        except KeyError: # no description available
            data_dictionary["videoVerboseDescription"].append(np.nan)
        try:    
            data_dictionary["numberOfComments"].append(individual_video_data[2]["itemSectionRenderer"]["contents"][0]["commentsEntryPointHeaderRenderer"]["commentCount"]["simpleText"])
        except KeyError: # correct for additional "merchandise" section
            try:
                data_dictionary["numberOfComments"].append(individual_video_data[3]["itemSectionRenderer"]["contents"][0]["commentsEntryPointHeaderRenderer"]["commentCount"]["simpleText"])
            except IndexError: # comments are turned off
                data_dictionary["numberOfComments"].append(np.nan)
        try:
            data_dictionary["isCreatorVerified"].append(individual_video_data[1]["videoSecondaryInfoRenderer"]["owner"]["videoOwnerRenderer"]["badges"][0]["metadataBadgeRenderer"]["tooltip"] == "Verified")
        except KeyError:
            data_dictionary["isCreatorVerified"].append(False)    
        time.sleep(5)
    return None

In [532]:
def update_meta_data(data_dictionary):
    # Update metadata for individual videos in data_dictionary INPLACE
    for video_id in data_dictionary["videoId"]:
        print("Currently processing video_id:", video_id)
        json_data = {
        'context': {
            'client': {
                'clientName': 'WEB',
                'clientVersion': '2.20231208.01.00',
            },
        },
        'videoId': video_id,
        }
        
        response = requests.post('https://www.youtube.com/youtubei/v1/player', json=json_data)
        
        meta_response = response.json()
        try:
            data_dictionary["videoKeywords"].append(meta_response["videoDetails"]["keywords"])
        except KeyError: # no keywords
            data_dictionary["videoKeywords"].append(np.nan)
        data_dictionary["videoLengthSeconds"].append(meta_response["videoDetails"]["lengthSeconds"])
        data_dictionary["videoIsLiveContent"].append(meta_response["videoDetails"]["isLiveContent"])
        data_dictionary["videoCategory"].append(meta_response["microformat"]["playerMicroformatRenderer"]["category"])
        data_dictionary["isFamilySafe"].append(meta_response["microformat"]["playerMicroformatRenderer"]["isFamilySafe"])
        data_dictionary["creatorUrl"].append(meta_response["microformat"]["playerMicroformatRenderer"]["ownerProfileUrl"])
        data_dictionary["videoExactPublishDate"].append(meta_response["microformat"]["playerMicroformatRenderer"]["uploadDate"])
        time.sleep(5)
    return None

#### Populate data_dictionary with trending videos

In [533]:
french_data_dictionary = {
"videoTitle": [],
"videoId": [],
"videoThumbnailUrl": [],
"videoDescriptionSnippet": [],
"videoRelativePublishedTimeText": [],
"videoLength": [],
"videoViewCountText": [],
"videoCreatorName": [],
"videoType": [],
"trendingCountry": [],
"exactViewNumber": [],
"numberLikes": [],
"videoDate": [],
"creatorSubscriberNumber": [],
"videoVerboseDescription": [],
"numberOfComments": [],
"isCreatorVerified": [],
"videoKeywords": [],
"videoLengthSeconds": [],
"videoIsLiveContent": [],
"videoCategory": [],
"isFamilySafe": [],
"videoExactPublishDate": [],
"creatorUrl": []
}

response_dict = post_request_ytb(FRENCH_COUNTRY_CODE_LIST, TRENDING_TYPE_DICT)
add_now_videos_shorts(response_dict, french_data_dictionary)
add_other_sections(response_dict, french_data_dictionary)

Current country :  FR


#### Update individual video data inside data_dictionary

In [534]:
update_video_data(french_data_dictionary)

Currently processing video_id: zePFAZMUnbU
Currently processing video_id: F8lPTlpThEo
Currently processing video_id: WB9wpwPVzyY
Currently processing video_id: P4hkzfYaz3Q
Currently processing video_id: 0_9I9Jn_YGE
Currently processing video_id: _Gv62FX9fFc
Currently processing video_id: dh8XxY717Gw
Currently processing video_id: s5GeFygLEig
Currently processing video_id: aVWrDes6A0Q
Currently processing video_id: tAcKfnf0zv4
Currently processing video_id: KlMrRUbmwPs
Currently processing video_id: XE_IK7ouDuc
Currently processing video_id: tVL9QrLQ0G0
Currently processing video_id: RwJ-OLVSNks
Currently processing video_id: OXBmgmCkIUc
Currently processing video_id: F_j29MEd2mY
Currently processing video_id: tmSm-tnifVk
Currently processing video_id: pBeB1s-58H8
Currently processing video_id: 22FwMc_V_Y4
Currently processing video_id: YfjpIw2pzbI
Currently processing video_id: GFIVMqa87QQ
Currently processing video_id: sEU5BTWnYk8
Currently processing video_id: FkY26NUZJyA
Currently p

#### Update individual video metadata inside data_dictionary

In [535]:
update_meta_data(french_data_dictionary)

Currently processing video_id: zePFAZMUnbU
Currently processing video_id: F8lPTlpThEo
Currently processing video_id: WB9wpwPVzyY
Currently processing video_id: P4hkzfYaz3Q
Currently processing video_id: 0_9I9Jn_YGE
Currently processing video_id: _Gv62FX9fFc
Currently processing video_id: dh8XxY717Gw
Currently processing video_id: s5GeFygLEig
Currently processing video_id: aVWrDes6A0Q
Currently processing video_id: tAcKfnf0zv4
Currently processing video_id: KlMrRUbmwPs
Currently processing video_id: XE_IK7ouDuc
Currently processing video_id: tVL9QrLQ0G0
Currently processing video_id: RwJ-OLVSNks
Currently processing video_id: OXBmgmCkIUc
Currently processing video_id: F_j29MEd2mY
Currently processing video_id: tmSm-tnifVk
Currently processing video_id: pBeB1s-58H8
Currently processing video_id: 22FwMc_V_Y4
Currently processing video_id: YfjpIw2pzbI
Currently processing video_id: GFIVMqa87QQ
Currently processing video_id: sEU5BTWnYk8
Currently processing video_id: FkY26NUZJyA
Currently p

In [537]:
df = pd.DataFrame(french_data_dictionary)

In [539]:
df.to_csv("./french_youtube_10_12_23.csv")