In [1]:
import pandas as pd 
import numpy as np 
import matplotlib as plt

In [5]:
videos = pd.read_csv("video_details.csv")
videos.head()

Unnamed: 0.1,Unnamed: 0,Video Link,Video Title,Views,Uploaded Date,Likes on Video,Dislikes on Video,Comments
0,0,https://www.youtube.com/watch?v=bWWTk-pZX5g,8 fiction books you need to read📚(& that will ...,"382,446 views",23 Mar 2020,18K,341,"Yoo Se Na - ""You know what sexy?....\nBooks.""\..."
1,1,https://www.youtube.com/watch?v=gyK9USvrvDQ,Top 10 BEST SELLING Books In History,"575,824 views",25 Aug 2018,12K,992,"- Hello Aluxers, what's your favorite book se..."
2,2,https://www.youtube.com/watch?v=GihybX7JyG4,(Full Audiobook) This Book Will Change Everyth...,"4,692,014 views",26 Oct 2017,61K,4.3K,Brian Perkins - I love audiobooks. I use youtu...
3,3,https://www.youtube.com/watch?v=4f2OCYaqLdE,I Read Every Book Joe from You Recommended,"1,977,658 views",25 Jan 2020,99K,734,Ishaan Vohra - Watching a smart guy do smart t...
4,4,https://www.youtube.com/watch?v=imA5NPX4ucU,SUPER BIG BOOKHAUL aka *leo has a book buying ...,"2,450 views",23 Jul 2020,233,0,Sabine's Book Nook - Us both having a book buy...


In [6]:
def strip_views(string):
    string = string.split(' ')[0]
    number = int(string.replace(",",""))
    return number

def strip_likes(string):
    temp = string.split(' ')[0]
    try:
        if 'K' in temp:
            temp = temp.replace("K","")
            number = int(float(temp)*1000)
        elif 'M' in temp:
            temp = temp.replace("M","")
            number = int(float(temp)*1000000)
        else:
            number = int(temp)
        return number
   
    except Exception as e:
        print(string)
        print("Error:", e)

def invert_dislikes(number): # This is to account for the decrease in quality when people dislike videos
    if number == 0:
        return number
    return -number

# strip_likes("233")

In [8]:
def preprocess():
    global videos
    videos = videos.drop(columns=["Comments"])
    videos = videos.dropna()
    videos = videos[videos['Likes on Video']!='LIKE']
    videos['Views'] = videos['Views'].apply(strip_views)
    videos['Likes on Video'] = videos['Likes on Video'].apply(strip_likes)
    videos["Dislikes on Video"] = videos['Dislikes on Video'].apply(strip_likes)
    videos["Dislikes on Video"] = videos['Dislikes on Video'].apply(invert_dislikes)

preprocess()

In [9]:
videos.head()

Unnamed: 0.1,Unnamed: 0,Video Link,Video Title,Views,Uploaded Date,Likes on Video,Dislikes on Video
0,0,https://www.youtube.com/watch?v=bWWTk-pZX5g,8 fiction books you need to read📚(& that will ...,382446,23 Mar 2020,18000,-341
1,1,https://www.youtube.com/watch?v=gyK9USvrvDQ,Top 10 BEST SELLING Books In History,575824,25 Aug 2018,12000,-992
2,2,https://www.youtube.com/watch?v=GihybX7JyG4,(Full Audiobook) This Book Will Change Everyth...,4692014,26 Oct 2017,61000,-4300
3,3,https://www.youtube.com/watch?v=4f2OCYaqLdE,I Read Every Book Joe from You Recommended,1977658,25 Jan 2020,99000,-734
4,4,https://www.youtube.com/watch?v=imA5NPX4ucU,SUPER BIG BOOKHAUL aka *leo has a book buying ...,2450,23 Jul 2020,233,0


In [10]:
def score_att(view, likes, dislikes): # to score based on quality of video
    return (likes + dislikes)/view

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def recommender(link):
    index = videos[videos['Video Link']==link].index.values

    # Title Similarity
    
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(videos['Video Title'])  
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    title_similarity = list(cosine_sim[index[0]])

    # Attribute Similarity - Views, Likes, Dislikes
    
    attributes = ['Views','Likes on Video','Dislikes on Video']
    video_att = videos[attributes]

    score = []

    for index, row in video_att.iterrows():
        score.append(score_att(row['Views'],row['Likes on Video'], row['Dislikes on Video']))
    
    # Combining Title as well as Attribute Similarity - Unweighted
    similarity = enumerate([0.7*title_similarity[i] + 0.3*score[i] for i in range(len(score))])

    sim_scores = sorted(similarity, key = lambda x: x[1], reverse=True)[1:11]
    video_titles = [i[0] for i in sim_scores]
    return videos['Video Title'].iloc[video_titles]


In [12]:
recommender("https://www.youtube.com/watch?v=imA5NPX4ucU")

16                      Can You Name a Book? ANY Book???
34           I Read A Book A Week (Here's What Happened)
29                Meri Book Recommendations & Collection
13                       SAVE ONE DROP ONE: BOOK EDITION
140    Best Smartphones Under ₹15000⚡⚡⚡July 2020 | 90...
127                   Stop buying expensive smart phones
3             I Read Every Book Joe from You Recommended
17     World's Best Life Changing Book - By Sandeep M...
2      (Full Audiobook) This Book Will Change Everyth...
74     'BOY STORY Official Weibo' 1 million subscribe...
Name: Video Title, dtype: object