# **Dependencies**

# Imports

In [229]:
import numpy as np
import pandas as pd
import grequests
import os

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

from urllib import parse as url
import threading
import queue
import time

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dasan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Constants

In [230]:
MAX_YT_SEARCH_RESULTS = 5

API_KEY = 'AIzaSyCqTnKWWl26r2_FbJfUlk4WjaFcvLgivUo'
# API_KEY = 'AIzaSyA4egAlqBItZV-K_2H2wpyUFtI4bh_Dz7g'
ACCESS_TOKEN = 'ya29.a0Ad52N389v24Zohy1pE8uKQsqXm-Zz0S5JDp7Aw1sFAhj3ShjtFi4wLpWwwtGGXGFp7uS9uou3TwXHpFpORv7uFgd3cYfmXFvDq2-myfeqfMJoflE1qp9iw6ZVwGb7FxI_d0RztGPYj6-8XNBpvs5dUjqjD8JPFMtQfAaCgYKAdkSARASFQHGX2MidN2FCOGU0OwYG1CMGrkYMA0170'

API_VERSION = 'v3'
API_NAME = 'youtube'

SCHEME = "https"
NETLOC = f'youtube.googleapis.com/{API_NAME}/{API_VERSION}'
YT_SEARCH_ENDPOINT = "/search"
YT_VIDEOS_ENDPOINT = "/videos"

# **Utility Functions**

## Function to remove any stopwords that might occur in tags

In [231]:
stop_words = set(stopwords.words(stopwords.fileids()))

def remove_stopword_tags(videos):
    for video in videos:
        if 'video_tags' in video.keys() and video['video_tags'] is not None:
            video['video_tags'] = list(filter(lambda x:x not in stop_words, video['video_tags']))
    return videos

## Function to stem the tags to their base/root words

In [232]:
ps = PorterStemmer()

def stem_tags(videos):
    for video in videos:
        if 'video_tags' in video.keys() and video['video_tags'] is not None:
            video['video_tags'] = [ps.stem(tag) for tag in video['video_tags']]
    return videos

## Function to generate a link that will get a **search response** from YouTube Data API

In [233]:
def prepare_link_for_search_request(region_code, order, channel_id=None, search_query=None): # order : viewCount | date
    params = {
        'type' : 'video',
        'part' : 'id',
        'maxResults' : MAX_YT_SEARCH_RESULTS,
        'order' : order,
        'regionCode' : region_code,
        'key' : API_KEY
    }
    if(channel_id != None):
        params['channelId'] = channel_id
    if(search_query != None):
        params['q'] = search_query

    url_str = url.urlunparse((SCHEME, NETLOC, YT_SEARCH_ENDPOINT, None, url.urlencode(params), None))
    return url_str


## Function to generate a link that will get a **list of videos** from YouTube Data API

In [234]:
def prepare_link_for_video_request(video_ids): 
    params = {
        'part' : ','.join(['snippet', 'id', 'contentDetails']),
        'id' : ','.join(video_ids),
        'key' : API_KEY
    }

    url_str = url.urlunparse((SCHEME, NETLOC, YT_VIDEOS_ENDPOINT, None, url.urlencode(params), None))
    return url_str

# **Implementations**

## Recommendation System Utilities

### Recommend videos by **Relevant Tags**

In [235]:
def get_recommendation_by_tags(watched_tags, result_dict=None) :
    if len(watched_tags) == 0 :
        if result_dict is not None:
            result_dict['TAGS_RES'] = None
        return None

    vectorizer = TfidfVectorizer(max_features=250, stop_words=stopwords.fileids())

    vectorized_tags = vectorizer.fit_transform(watched_tags)
    vectorized_tags_DF = pd.DataFrame(vectorized_tags.toarray(), index=watched_tags, columns=vectorizer.get_feature_names_out())
    vectorized_tags_DF.loc[len(vectorized_tags_DF.index)] = abs(vectorized_tags_DF.sum() - 1)

    sorted_tags = (vectorized_tags_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_tags = sorted_tags[0: 10 if len(sorted_tags) > 10 else len(sorted_tags)]

    videos_to_search_by_top_tags = [prepare_link_for_search_request('IN', 'date' if np.random.rand() > 0.5 else 'viewCount', search_query=single_tag) for single_tag in reduced_sorted_tags.index]
    search_results = grequests.map(grequests.get(u) for u in videos_to_search_by_top_tags)

    if ['error'] in [list(response.json().keys()) for response in search_results]:
        if result_dict is not None:
            result_dict['TAGS_RES'] = None
        return None

    items_in_search_response = [x for xs in [response.json()['items'] for response in search_results] for x in xs]
    video_ids = [item['videoId'] for item in [item['id'] for item in items_in_search_response]]

    video_search_results = grequests.map(grequests.get(u) for u in [prepare_link_for_video_request(video_ids)])

    if result_dict is not None:
        result_dict['TAGS_RES'] = video_search_results[0].json()['items']
    return video_search_results[0].json()['items']


### Recommend videos by **Relevant Topics**

In [236]:
def get_recommendation_by_topics(watched_topics, result_dict=None) :
    if len(watched_topics) == 0 :
        if result_dict is not None:
            result_dict['TOPICS_RES'] = None
        return None

    vectorizer = TfidfVectorizer(max_features=250, stop_words=stopwords.fileids())

    vectorized_tags = vectorizer.fit_transform(watched_topics)
    vectorized_tags_DF = pd.DataFrame(vectorized_tags.toarray(), index=watched_topics, columns=vectorizer.get_feature_names_out())
    vectorized_tags_DF.loc[len(vectorized_tags_DF.index)] = abs(vectorized_tags_DF.sum() - 1)

    sorted_tags = (vectorized_tags_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_tags = sorted_tags[0: 10 if len(sorted_tags) > 10 else len(sorted_tags)]

    videos_to_search_by_top_tags = [prepare_link_for_search_request('IN', 'date' if np.random.rand() > 0.5 else 'viewCount', search_query=single_tag) for single_tag in reduced_sorted_tags.index]
    search_results = grequests.map(grequests.get(u) for u in videos_to_search_by_top_tags)
    
    if ['error'] in [list(response.json().keys()) for response in search_results]:
        if result_dict is not None:
            result_dict['TOPICS_RES'] = None
        return None

    items_in_search_response = [x for xs in [response.json()['items'] for response in search_results] for x in xs]
    video_ids = [item['videoId'] for item in [item['id'] for item in items_in_search_response]]

    video_search_results = grequests.map(grequests.get(u) for u in [prepare_link_for_video_request(video_ids)])

    if result_dict is not None:
        result_dict['TOPICS_RES'] = video_search_results[0].json()['items']
    return video_search_results[0].json()['items']


### Recommend videos by **Relevant Channels**

In [237]:
def get_recommendation_by_channel_ids(watched_channel_ids, result_dict=None) :
    if len(watched_channel_ids) == 0 :
        if result_dict is not None:
            result_dict['CHANNELS_RES'] = None
        return None

    vectorizer = CountVectorizer(max_features=250, stop_words=stopwords.fileids())

    vectorized_channels = vectorizer.fit_transform(watched_channel_ids)
    vectorized_channels_DF = pd.DataFrame(vectorized_channels.toarray(), index=watched_channel_ids, columns=vectorizer.get_feature_names_out())
    vectorized_channels_DF.loc[len(vectorized_channels_DF.index)] = abs(vectorized_channels_DF.sum() - 1)

    sorted_channels = (vectorized_channels_DF.iloc[-1].sort_values(ascending=False))
    reduced_sorted_channels = sorted_channels[0: 10 if len(sorted_channels) > 10 else len(sorted_channels)]

    videos_to_search_by_top_tags = [prepare_link_for_search_request('IN', 'date' if np.random.rand() > 0.5 else 'viewCount', channel_id=single_channel) for single_channel in reduced_sorted_channels.index]
    search_results = grequests.map(grequests.get(u) for u in videos_to_search_by_top_tags)

    if ['error'] in [list(response.json().keys()) for response in search_results]:
        if result_dict is not None:
            result_dict['CHANNELS_RES'] = None
        return None

    items_in_search_response = [x for xs in [response.json()['items'] for response in search_results] for x in xs]
    video_ids = [item['videoId'] for item in [item['id'] for item in items_in_search_response]]

    video_search_results = grequests.map(grequests.get(u) for u in [prepare_link_for_video_request(video_ids)])

    if result_dict is not None:
        result_dict['CHANNELS_RES'] = video_search_results[0].json()['items']
    return video_search_results[0].json()['items']

## Recommendation System

In [238]:
def get_recommendations(watched_channel_ids, watched_topics, watched_tags):

    result = {}

    channel_thread = threading.Thread(name='channel_thread', target=get_recommendation_by_channel_ids, args=(watched_channel_ids, result, ))
    tags_thread = threading.Thread(name='tags_thread', target=get_recommendation_by_tags, args=(watched_tags, result, ))
    topics_thread = threading.Thread(name='topics_thread', target=get_recommendation_by_topics, args=(watched_topics, result, ))

    channel_thread.start()
    tags_thread.start()
    topics_thread.start()

    channel_thread.join()
    tags_thread.join()
    topics_thread.join()

    videos = []
    if result['CHANNELS_RES'] is not None :
        videos = videos + result['CHANNELS_RES']
    if result['TAGS_RES'] is not None :
        videos = videos + result['TAGS_RES']
    if result['TOPICS_RES'] is not None :
        videos = videos + result['TOPICS_RES']

    return {'videos' : videos,
            'videos_by_channel_id' : result['CHANNELS_RES'],
            'videos_by_top_tags' : result['TAGS_RES'],
            'videos_by_top_topics' : result['TOPICS_RES']
            }

In [239]:
tags = ['gamers',
   'vctth',
   'valorant',
   'vct',
   'vct masters',
   'xerxia',
   'sScary',
   'foxz',
   'Sushiboys',
   'Crws',
   'Surf',
   'Zeus',
   'XIA',
   'BLEED',
   'Esports',
   'Pro player',
   'วาโลแรนต์',
   'crazyguy',
   'Deryeon',
   'Juicy',
   'LEGIJA',
   'Bleed',
   'Aim',
   'routine',
   'games',
   'valorant moment',
   'MickiePP',
   'Superbuss',
   'Boomburapa',
   'Viperdemon',
   'Mith',
   'nephh',
   'Fullsense',
    'yourenotjustin',
   'Justin',
   'valorant',
   'overdrive bundle',
   'what does overdrive bundle have',
   'what skins are in overdrive bundle',
   'overdrive',
   'when does overdrive come out',
   'overdrive reveal valorant',
   'new skins valorant',
   'overdrive price valorant',
   'how much is overdrive valorant',
   'when does overdrive valorant',
   'valorant update',
   'is overdrive bundle worth it',
   'all upgrades',
   'valorant overdrive phantom',
   'overdrive blade',
   'overdrive bundle showcase',
   'review',
   'overdrive sheriff',
   'upgraded',
   'overdrive',
   'valorant overdrive vandal',
   'valorant skins',
   'valorant new skin bundle',
   'bundle overdrive',
   'overdrive katana',
   'overdrive knife',
   'valorant katana',
   'valorant skin bundle',
   'valorant new',
   'valorant new skins',
   'valorant new aimbot',
   'valorant aimbot skin',
   'valorant points',
   'free valorant skins',
   'valorant points free',
   'valorant',
   'dark and darker is better game',
   'valorant overdrive skin',
   'valorant overdrive gameplay',
   'valorant gameplay',
   'valorant yoru',
   'valorant aimbot',
   'valorant',
   'valorant highlights',
   'horcus',
   'gaming',
   'radiant',
   'vlorant',
   'valorant live',
   'live valorant',
   'valorant español',
   'vvalorant',
   'alorant',
   'vaorant',
   'valorat',
   'valorant españa',
   'valorant latam',
   'valornt',
   'valoant',
   'valorant gameplay',
   'valorant competir',
   'competir valorant',
   'competir en valorant',
   'competitivo valorant',
   'valorant competitivo',
   'no competir en valorant',
   'nunca valorant',
   'valorant nunca',
   'compito valorant',
   'compito en valorant',
   'valorant compito',
   'no competir valorant',
   'valorant no competir']
topics = ['Action_game',
   'Role-playing_video_game',
   'Video_game_culture','Action_game',
   'Strategy_video_game',
   'Video_game_culture','Action_game',
   'Role-playing_video_game',
   'Video_game_culture']
channelIDs = ['abc0',
   'abc1',
   'abc2','abc11',
   'abc3',
   'abc4','abc6','abc5',
   'abc5',
   'abc6',
   'abc7','abc11','abc0',
   'abc8',
   'abc9',
   'abc10','abc6','abc6','abc6','abc6','abc0',
   'abc11',
   'abc12','abc11','abc0',
   'abc13','abc0',
   'abc14','abc6','abc5','abc0',
   'abc15',
   'abc16',
   'abc17','abc11',
   'abc18',
   'abc19']

start_time = time.time()
videos = get_recommendations([], topics, tags)
print('Exceution Time:', (time.time() - start_time))

Exceution Time: 5.821445941925049


  with loop.timer(seconds, ref=ref) as t:


In [240]:
videos

{'videos': [{'kind': 'youtube#video',
   'etag': 'ZLk5x92S1lty0_JEK_4FmC2ghWs',
   'id': 'AzjpMbFvJpA',
   'snippet': {'publishedAt': '2024-03-12T17:12:27Z',
    'channelId': 'UCCuzDCoI3EUOo_nhCj4noSw',
    'title': 'VALORANT NIGHTO WIT JOT, GURAISU, DANAN!@#$%^',
    'description': "Sociabuzz: https://sociabuzz.com/ybrap\nSaweria:https://saweria.co/yb\n\nI'm still alive.",
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/AzjpMbFvJpA/default_live.jpg',
      'width': 120,
      'height': 90},
     'medium': {'url': 'https://i.ytimg.com/vi/AzjpMbFvJpA/mqdefault_live.jpg',
      'width': 320,
      'height': 180},
     'high': {'url': 'https://i.ytimg.com/vi/AzjpMbFvJpA/hqdefault_live.jpg',
      'width': 480,
      'height': 360},
     'standard': {'url': 'https://i.ytimg.com/vi/AzjpMbFvJpA/sddefault_live.jpg',
      'width': 640,
      'height': 480},
     'maxres': {'url': 'https://i.ytimg.com/vi/AzjpMbFvJpA/maxresdefault_live.jpg',
      'width': 1280,
      'height': 72