# **Dependencies**

## Imports

In [None]:
# !pip install --upgrade google-api-python-client

In [None]:
import numpy as np
import pandas as pd

In [None]:
import requests

In [None]:
import os
from googleapiclient.discovery import build

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

## Constants

In [None]:
MAX_YT_SEARCH_RESULTS = 2

In [None]:
global SEARCH_LIST_API_CALL_COUNT
global VIDEO_LIST_API_CALL_COUNT

global CHANNEL_SEARCH_CALL_COUNT
global TOPIC_SEARCH_CALL_COUNT

# **Country Code and Language**

In [None]:
def get_country_code():
    try:
        response = requests.get("https://ipinfo.io")
        data = response.json()
        country_code = data.get("country", "")
        return country_code
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [None]:
country_language_mapping = {
    'US': 'en',  # English
    'CA': 'en, fr',  # English, French
    'GB': 'en',  # English
    'FR': 'fr',  # French
    'ES': 'es',  # Spanish
    'DE': 'de',  # German
    'IT': 'it',  # Italian
    'JP': 'ja',  # Japanese
    'CN': 'zh',  # Mandarin (Chinese)
    'RU': 'ru',  # Russian
    'IN': 'hi, en',  # Hindi, English, and many regional languages
    'BR': 'pt',  # Portuguese
    'MX': 'es',  # Spanish
    'AU': 'en',  # English
    'AR': 'es',  # Spanish
    'EG': 'ar',  # Arabic
    'SA': 'ar',  # Arabic
    'KR': 'ko',  # Korean
    'ID': 'id',  # Indonesian
    'NG': 'en',  # English
    'KE': 'sw, en',  # Swahili, English
    'TR': 'tr',  # Turkish
    'IR': 'fa',  # Persian
    # Add more country-code to language mappings as needed
}

In [None]:
country_code = get_country_code()
print(country_code)
language_spoken = 'en'
if country_code in country_language_mapping:
    language_spoken = country_language_mapping[country_code]

print(language_spoken)

US
en


# **Youtube API Init**

In [None]:
API_KEY = 'AIzaSyCqTnKWWl26r2_FbJfUlk4WjaFcvLgivUo'

API_VERSION = 'v3'
API_NAME = 'youtube'

In [None]:
youtube = build(API_NAME, API_VERSION, developerKey=API_KEY)

# **Youtube API Search Functions**

## Basic Youtube Search

In [None]:
def search_youtube(query, max_results=MAX_YT_SEARCH_RESULTS):
    global SEARCH_LIST_API_CALL_COUNT
    # Call the search.list method to search for videos
    SEARCH_LIST_API_CALL_COUNT += 1
    search_response = youtube.search().list(
        q=query,
        type='video',
        part='id,snippet',
        maxResults=max_results,
        relevanceLanguage=language_spoken,
        regionCode=country_code
    ).execute()

    videos = []
    for search_result in search_response.get('items', []):
        if search_result['id']['kind'] == 'youtube#video':
            video = {
                'title': search_result['snippet']['title'],
                'video_id': search_result['id']['videoId']
            }
            videos.append(video)

    return videos

## Search By Channel ID

### Most Recent Videos

In [None]:
def get_most_recent_videos_by_channel_id(channel_id, max_results=MAX_YT_SEARCH_RESULTS):
    global SEARCH_LIST_API_CALL_COUNT, VIDEO_LIST_API_CALL_COUNT, CHANNEL_SEARCH_CALL_COUNT

    # Call the search.list method to search for videos from the specified channel
    SEARCH_LIST_API_CALL_COUNT += 1
    search_response = youtube.search().list(
        channelId=channel_id,
        type='video',
        part='id',
        maxResults=max_results,
        order='date',  # Sort by date (most recent)
        relevanceLanguage=language_spoken,
        regionCode=country_code
    ).execute()

    video_ids = [result['id']['videoId'] for result in search_response.get('items', [])]

    # Call the videos.list method to retrieve video details
    VIDEO_LIST_API_CALL_COUNT += 1
    video_details = youtube.videos().list(
        part='snippet, topicDetails',
        id=','.join(video_ids)
    ).execute()

    videos = []
    for video in video_details.get('items', []):
        video = {
            'title': video['snippet']['title'],
            'video_id': video['id'],
            'topic_details': [s[30:] for s in video['topicDetails']['topicCategories']],
            'video_tags': video['snippet']['tags'] if 'tags' in video['snippet'].keys() else None
        }
        videos.append(video)

    CHANNEL_SEARCH_CALL_COUNT += 1
    return videos

### Most Popular Videos

In [None]:
def get_most_popular_videos_by_channel_id(channel_id, max_results=MAX_YT_SEARCH_RESULTS):
    global SEARCH_LIST_API_CALL_COUNT, VIDEO_LIST_API_CALL_COUNT, CHANNEL_SEARCH_CALL_COUNT

    # Call the search.list method to search for videos from the specified channel
    SEARCH_LIST_API_CALL_COUNT += 1
    search_response = youtube.search().list(
        channelId=channel_id,
        type='video',
        part='id',
        maxResults=max_results,
        order='viewCount',  # Sort by viewCount (most viewed)
        relevanceLanguage=language_spoken,
        regionCode=country_code
    ).execute()

    video_ids = [result['id']['videoId'] for result in search_response.get('items', [])]

    # Call the videos.list method to retrieve video details
    VIDEO_LIST_API_CALL_COUNT += 1
    video_details = youtube.videos().list(
        part='snippet, topicDetails',
        id=','.join(video_ids)
    ).execute()

    videos = []
    for video in video_details.get('items', []):
        video = {
            'title': video['snippet']['title'],
            'video_id': video['id'],
            'topic_details': [s[30:] for s in video['topicDetails']['topicCategories']],
            'video_tags': video['snippet']['tags'] if 'tags' in video['snippet'].keys() else None
        }
        videos.append(video)

    CHANNEL_SEARCH_CALL_COUNT += 1
    return videos

## Search By Topic

### Most Recent Videos

In [None]:
def get_most_recent_videos_by_topic(topic, max_results=MAX_YT_SEARCH_RESULTS):
    global SEARCH_LIST_API_CALL_COUNT, VIDEO_LIST_API_CALL_COUNT, TOPIC_SEARCH_CALL_COUNT

    # Call the search.list method to search for videos on the specified topic
    SEARCH_LIST_API_CALL_COUNT += 1
    search_response = youtube.search().list(
        q=topic,
        type='video',
        part='id',
        maxResults=max_results,
        order='date',  # Sort by date (most recent)
        relevanceLanguage=language_spoken,
        regionCode=country_code
    ).execute()

    video_ids = [result['id']['videoId'] for result in search_response.get('items', [])]

    # Call the videos.list method to retrieve video details
    VIDEO_LIST_API_CALL_COUNT += 1
    video_details = youtube.videos().list(
        part='snippet, topicDetails',
        id=','.join(video_ids)
    ).execute()

    videos = []
    for video in video_details['items']:
        dict_to_append = {
            'title': video['snippet']['title'],
            'video_id': video['id']
        }
        if 'topicDetails' in video.keys() :
            dict_to_append['topic_details'] = [s[30:] for s in video['topicDetails']['topicCategories']]
        if 'tags' in video['snippet'].keys() :
            dict_to_append['video_tags'] = video['snippet']['tags']

        videos.append(dict_to_append)

    TOPIC_SEARCH_CALL_COUNT += 1
    return videos

### Most Popular Videos

In [None]:
def get_most_popular_videos_by_topic(topic, max_results=MAX_YT_SEARCH_RESULTS):
    global SEARCH_LIST_API_CALL_COUNT, VIDEO_LIST_API_CALL_COUNT, TOPIC_SEARCH_CALL_COUNT

    # Call the search.list method to search for videos on the specified topic
    SEARCH_LIST_API_CALL_COUNT += 1
    search_response = youtube.search().list(
        q=topic,
        type='video',
        part='id',
        maxResults=max_results,
        order='viewCount',  # Sort by viewCount (most viewed)
        relevanceLanguage=language_spoken,
        regionCode=country_code
    ).execute()

    video_ids = [result['id']['videoId'] for result in search_response.get('items', [])]

    # Call the videos.list method to retrieve video details
    VIDEO_LIST_API_CALL_COUNT += 1
    video_details = youtube.videos().list(
        part='snippet, topicDetails',
        id=','.join(video_ids)
    ).execute()

    videos = []
    for video in video_details.get('items', []):
        dict_to_append = {
            'title': video['snippet']['title'],
            'video_id': video['id']
        }
        if 'topicDetails' in video.keys() :
            dict_to_append['topic_details'] = [s[30:] for s in video['topicDetails']['topicCategories']]
        if 'tags' in video['snippet'].keys() :
            dict_to_append['video_tags'] = video['snippet']['tags']

        videos.append(dict_to_append)

    TOPIC_SEARCH_CALL_COUNT += 1
    return videos

# **Utility Functions**

## Function to check if a video is a Shorts video

In [None]:
def is_not_shorts_video(video):
    url = 'https://www.youtube.com/shorts/' + video['video_id']
    ret = requests.head(url)
    # whether 303 or other values, it's not short
    return ret.status_code != 200

def filter_shorts(videos):
    return [v for v in videos if is_not_shorts_video(v)]

## Function to stem the tags to their base/root words

In [None]:
ps = PorterStemmer()

def stem_tags(videos):
    for video in videos:
        if 'video_tags' in video.keys() and video['video_tags'] is not None:
            video['video_tags'] = [ps.stem(tag) for tag in video['video_tags']]
    return videos

## Function to remove any stopwords that might occur in tags

In [None]:
stop_words = set(stopwords.words('english'))

def remove_stopword_tags(videos):
    for video in videos:
        if 'video_tags' in video.keys() and video['video_tags'] is not None:
            video['video_tags'] = list(filter(lambda x:x not in stop_words, video['video_tags']))
    return videos

# **Implementations**

## Recommendation System Utilities

### Recommend videos by **Channel ID**

In [None]:
def get_recommendation_by_channel_id(watched_channel_ids) :
    if len(watched_channel_ids) == 0 :
        return None

    vectorizer = CountVectorizer(max_features=250, stop_words='english')

    vectorized_channels = vectorizer.fit_transform(watched_channel_ids)
    vectorized_channels_DF = pd.DataFrame(vectorized_channels.toarray(), index=watched_channel_ids, columns=vectorizer.get_feature_names_out())
    vectorized_channels_DF.loc[len(vectorized_channels_DF.index)] = abs(vectorized_channels_DF.sum() - 1)

    sorted_channels = (vectorized_channels_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_channels = sorted_channels[0: 5 if len(sorted_channels) > 5 else len(sorted_channels)]

    videos_by_channel_id = []
    for watched_channel_id in reduced_sorted_channels.index :
        videos_to_add = get_most_recent_videos_by_channel_id(watched_channel_id) if np.random.rand() > 0.5 else get_most_popular_videos_by_channel_id(watched_channel_id)
        videos_by_channel_id = videos_by_channel_id + videos_to_add
    return filter_shorts(videos_by_channel_id)

### Recommend videos by **Relevant Tags**

In [None]:
def get_recommendation_by_tags(watched_tags) :
    if len(watched_tags) == 0 :
        return None

    vectorizer = TfidfVectorizer(max_features=250, stop_words='english')

    vectorized_tags = vectorizer.fit_transform(watched_tags)
    vectorized_tags_DF = pd.DataFrame(vectorized_tags.toarray(), index=watched_tags, columns=vectorizer.get_feature_names_out())
    vectorized_tags_DF.loc[len(vectorized_tags_DF.index)] = abs(vectorized_tags_DF.sum() - 1)

    sorted_tags = (vectorized_tags_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_tags = sorted_tags[0: 10 if len(sorted_tags) > 10 else len(sorted_tags)]

    videos_by_top_tags = []
    for single_tag in reduced_sorted_tags.index :
        videos_to_add = get_most_recent_videos_by_topic(single_tag) if np.random.rand() > 0.5 else get_most_popular_videos_by_topic(single_tag)
        videos_by_top_tags = videos_by_top_tags + videos_to_add

    return filter_shorts(videos_by_top_tags)

### Recommend videos by **Relevant Topics**

In [None]:
def get_recommendation_by_topics(watched_topics) :
    if len(watched_topics) == 0 :
        return None

    vectorizer = TfidfVectorizer(max_features=250, stop_words='english')

    vectorized_topics = vectorizer.fit_transform(watched_topics)
    vectorized_topics_DF = pd.DataFrame(vectorized_topics.toarray(), index=watched_topics, columns=vectorizer.get_feature_names_out())
    vectorized_topics_DF.loc[len(vectorized_topics_DF.index)] = abs(vectorized_topics_DF.sum() - 1)

    sorted_topics = (vectorized_topics_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_topics = sorted_topics[0: 5 if len(sorted_topics) > 5 else len(sorted_topics)]

    videos_by_top_topics = []
    for single_topic in reduced_sorted_topics.index :
        videos_to_add = get_most_recent_videos_by_topic(single_topic) if np.random.rand() > 0.5 else get_most_popular_videos_by_topic(single_topic)
        videos_by_top_topics = videos_by_top_topics + videos_to_add

    return filter_shorts(videos_by_top_topics)

## Recommendation System

In [None]:
def get_recommendations(watched_channel_ids, watched_topics, watched_tags):
    videos_by_channel_id = get_recommendation_by_channel_id(watched_channel_ids)
    videos_by_top_tags = get_recommendation_by_tags(watched_tags)
    videos_by_top_topics = get_recommendation_by_topics(watched_topics)

    videos = []
    if videos_by_channel_id is not None :
        videos = videos + videos_by_channel_id
    if videos_by_top_tags is not None :
        videos = videos + videos_by_top_tags
    if videos_by_top_topics is not None :
        videos = videos + videos_by_top_topics

    return {'videos' : videos,
            'videos_by_channel_id' : videos_by_channel_id,
            'videos_by_top_tags' : videos_by_top_tags,
            'videos_by_top_topics' : videos_by_top_topics
            }

# **Testing Data**

In [None]:
SEARCH_LIST_API_CALL_COUNT = 0
VIDEO_LIST_API_CALL_COUNT = 0

CHANNEL_SEARCH_CALL_COUNT = 0
TOPIC_SEARCH_CALL_COUNT = 0

In [None]:
tags = ['gamers',
   'vctth',
   'valorant',
   'vct',
   'vct masters',
   'xerxia',
   'sScary',
   'foxz',
   'Sushiboys',
   'Crws',
   'Surf',
   'Zeus',
   'XIA',
   'BLEED',
   'Esports',
   'Pro player',
   'วาโลแรนต์',
   'crazyguy',
   'Deryeon',
   'Juicy',
   'LEGIJA',
   'Bleed',
   'Aim',
   'routine',
   'games',
   'valorant moment',
   'MickiePP',
   'Superbuss',
   'Boomburapa',
   'Viperdemon',
   'Mith',
   'nephh',
   'Fullsense',
    'yourenotjustin',
   'Justin',
   'valorant',
   'overdrive bundle',
   'what does overdrive bundle have',
   'what skins are in overdrive bundle',
   'overdrive',
   'when does overdrive come out',
   'overdrive reveal valorant',
   'new skins valorant',
   'overdrive price valorant',
   'how much is overdrive valorant',
   'when does overdrive valorant',
   'valorant update',
   'is overdrive bundle worth it',
   'all upgrades',
   'valorant overdrive phantom',
   'overdrive blade',
   'overdrive bundle showcase',
   'review',
   'overdrive sheriff',
   'upgraded',
   'overdrive',
   'valorant overdrive vandal',
   'valorant skins',
   'valorant new skin bundle',
   'bundle overdrive',
   'overdrive katana',
   'overdrive knife',
   'valorant katana',
   'valorant skin bundle',
   'valorant new',
   'valorant new skins',
   'valorant new aimbot',
   'valorant aimbot skin',
   'valorant points',
   'free valorant skins',
   'valorant points free',
   'valorant',
   'dark and darker is better game',
   'valorant overdrive skin',
   'valorant overdrive gameplay',
   'valorant gameplay',
   'valorant yoru',
   'valorant aimbot',
   'valorant',
   'valorant highlights',
   'horcus',
   'gaming',
   'radiant',
   'vlorant',
   'valorant live',
   'live valorant',
   'valorant español',
   'vvalorant',
   'alorant',
   'vaorant',
   'valorat',
   'valorant españa',
   'valorant latam',
   'valornt',
   'valoant',
   'valorant gameplay',
   'valorant competir',
   'competir valorant',
   'competir en valorant',
   'competitivo valorant',
   'valorant competitivo',
   'no competir en valorant',
   'nunca valorant',
   'valorant nunca',
   'compito valorant',
   'compito en valorant',
   'valorant compito',
   'no competir valorant',
   'valorant no competir']
topics = ['Action_game',
   'Role-playing_video_game',
   'Video_game_culture','Action_game',
   'Strategy_video_game',
   'Video_game_culture','Action_game',
   'Role-playing_video_game',
   'Video_game_culture']
channelIDs = ['abc0',
 'abc1',
 'abc2','abc11',
 'abc3',
 'abc4','abc6','abc5',
 'abc5',
 'abc6',
 'abc7','abc11','abc0',
 'abc8',
 'abc9',
 'abc10','abc6','abc6','abc6','abc6','abc0',
 'abc11',
 'abc12','abc11','abc0',
 'abc13','abc0',
 'abc14','abc6','abc5','abc0',
 'abc15',
 'abc16',
 'abc17','abc11',
 'abc18',
 'abc19']
videos = get_recommendations([], topics, tags)
print('num videos found = ', len(videos['videos']))

num videos found =  21


In [None]:
SEARCH_LIST_API_CALL_COUNT

10

In [None]:
VIDEO_LIST_API_CALL_COUNT

10

In [None]:
CHANNEL_SEARCH_CALL_COUNT

0

In [None]:
TOPIC_SEARCH_CALL_COUNT

10

In [None]:
videos['videos']

[{'title': 'Die For You ft. Grabbitz // Official Music Video // VALORANT Champions 2021',
  'video_id': 'h7MYJghRWt0',
  'topic_details': ['Video_game_culture'],
  'video_tags': ['VALORANT',
   'VALORANT Champions Tour',
   'VALORANT Esports',
   'VALORANT Berlin',
   'VALORANT Tournament',
   'VALORANT Die For You',
   'VALORANT Grabbitz',
   'Grabbitz Die For You',
   'Die For You',
   'Die For You VALORANT',
   'VALORANT Cinematic',
   'VALORANT Music Video',
   'Champions Music Video',
   'VCT Music Video',
   'VALORANT song',
   'VALORANT anthem',
   'Champions anthem',
   'valorant esports music',
   'valorant champions music',
   'riot games music']},
 {'title': 'L’Accord - Chamber Agent Trailer // VALORANT',
  'video_id': 'FUoqAn5T4h4',
  'topic_details': ['Action_game',
   'Strategy_video_game',
   'Video_game_culture'],
  'video_tags': ['Chamber',
   'New Agent',
   'VALORANT Episode 3',
   'Episode III',
   'VALORANT trailer',
   'VALORANT',
   'VALORANT game',
   'valorant 