#Importing The Dependencies

In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#Data Collection And Pre-Processing

In [2]:
web_series_data = pd.read_csv('/content/All_Streaming_Shows.csv')

In [3]:
web_series_data.head()

Unnamed: 0,Series Title,Year Released,Content Rating,IMDB Rating,R Rating,Genre,Description,No of Seasons,Streaming Platform
0,Breaking Bad,2008,18+,9.5,100,"Crime,Drama","When Walter White, a New Mexico chemistry teac...",5Seasons,Netflix
1,Game of Thrones,2011,18+,9.3,99,"Action & Adventure,Drama",Seven noble families fight for control of the ...,8Seasons,"HBO MAX,HBO"
2,Rick and Morty,2013,18+,9.2,97,"Animation,Comedy",Rick is a mentally-unbalanced but scientifical...,4Seasons,"Free Services,HBO MAX,Hulu"
3,Stranger Things,2016,16+,8.8,96,"Drama,Fantasy","When a young boy vanishes, a small town uncove...",3Seasons,Netflix
4,The Boys,2019,18+,8.7,95,"Action & Adventure,Comedy",A group of vigilantes known informally as “The...,2Seasons,Prime Video


In [4]:
web_series_data.shape

(12353, 9)

In [5]:
selected_features = ['Content Rating','IMDB Rating','R Rating','Genre','Description','Streaming Platform']
print(selected_features)

['Content Rating', 'IMDB Rating', 'R Rating', 'Genre', 'Description', 'Streaming Platform']


In [9]:
for feature in selected_features:
  web_series_data[feature] = web_series_data[feature].fillna('')

In [11]:
combined_features = web_series_data['Content Rating'].astype(str)+' '+web_series_data['IMDB Rating'].astype(str)+' '+web_series_data['R Rating'].astype(str)+' '+web_series_data['Genre'].astype(str)+' '+web_series_data['Description'].astype(str)+' '+web_series_data['Streaming Platform'].astype(str)

In [12]:
print(combined_features)

0        18+ 9.5 100 Crime,Drama When Walter White, a N...
1        18+ 9.3 99 Action & Adventure,Drama Seven nobl...
2        18+ 9.2 97 Animation,Comedy Rick is a mentally...
3        16+ 8.8 96 Drama,Fantasy When a young boy vani...
4        18+ 8.7 95 Action & Adventure,Comedy A group o...
                               ...                        
12348      -1 2017,Prime Video A Fishing Story with Ron...
12349                                          -1 -1 -1 -1
12350      -1 2016,Prime Video Road to the NHL Outdoor ...
12351      -1 2018,Prime Video Addy Media has one or mo...
12352      -1 2014,Free Services My Dream Derelict Home...
Length: 12353, dtype: object


In [13]:
vectorizer = TfidfVectorizer()

In [14]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [15]:
print(feature_vectors)

  (0, 256)	0.05721753118520185
  (0, 117)	0.10796586723381307
  (0, 9788)	0.18019612062681198
  (0, 12159)	0.09273053071967656
  (0, 42722)	0.06601734920667847
  (0, 42307)	0.14094981519161914
  (0, 42760)	0.10607770577095663
  (0, 27645)	0.06308451820233883
  (0, 25763)	0.12888653750403972
  (0, 7807)	0.15177237806096944
  (0, 38991)	0.12203334924785361
  (0, 20363)	0.041277156880299135
  (0, 11287)	0.15975019979941793
  (0, 43030)	0.08186159572897687
  (0, 37267)	0.11958770101248102
  (0, 19365)	0.14423672615256647
  (0, 6942)	0.1618232447923916
  (0, 2813)	0.172140873612601
  (0, 16437)	0.11504446576848291
  (0, 31185)	0.19141706461155536
  (0, 28398)	0.09012578091568132
  (0, 28576)	0.0774953751925665
  (0, 40749)	0.06685766055983533
  (0, 43522)	0.07848245259862584
  (0, 23112)	0.10354706505190957
  :	:
  (12352, 28556)	0.10831112178959441
  (12352, 3951)	0.05997768197341551
  (12352, 15162)	0.08316179192349593
  (12352, 31538)	0.06019473005762175
  (12352, 20435)	0.03583559694795

#Cosine Similarity

In [16]:
similarity = cosine_similarity(feature_vectors)

In [17]:
print(similarity)

[[1.         0.12951651 0.10335865 ... 0.03838473 0.03691724 0.05298923]
 [0.12951651 1.         0.12648551 ... 0.0429817  0.03436602 0.04305147]
 [0.10335865 0.12648551 1.         ... 0.03488758 0.04034977 0.06866229]
 ...
 [0.03838473 0.0429817  0.03488758 ... 1.         0.1097578  0.10052896]
 [0.03691724 0.03436602 0.04034977 ... 0.1097578  1.         0.18402694]
 [0.05298923 0.04305147 0.06866229 ... 0.10052896 0.18402694 1.        ]]


In [18]:
print(similarity.shape)

(12353, 12353)


In [23]:
Web_Series_name = input(' Enter your favourite Web Series name : ')

 Enter your favourite Web Series name : Game of Thrones


In [24]:
list_of_all_titles = web_series_data['Series Title'].tolist()
print(list_of_all_titles)



In [25]:
find_close_match = difflib.get_close_matches(Web_Series_name, list_of_all_titles)
print(find_close_match)

['Game of Thrones', 'Game of Stones', 'Game of Crowns']


In [26]:
close_match = find_close_match[0]
print(close_match)

Game of Thrones


In [34]:
index_of_the_web_series = web_series_data[web_series_data['Series Title'] == close_match].index[0]
print(index_of_the_web_series)

1


In [35]:
similarity_score = list(enumerate(similarity[index_of_the_web_series]))
print(similarity_score)

[(0, 0.129516507495294), (1, 1.0), (2, 0.12648551015844564), (3, 0.10402442249922275), (4, 0.1302018945229472), (5, 0.13414094517990444), (6, 0.23984293496474535), (7, 0.13822639517411187), (8, 0.10832622711350952), (9, 0.14302893262803779), (10, 0.2060965350870879), (11, 0.09027382006330173), (12, 0.0954722977640145), (13, 0.28384238685806273), (14, 0.06493656490251094), (15, 0.10066451047753834), (16, 0.07789911448986846), (17, 0.2900606482683779), (18, 0.10909094823875051), (19, 0.08906101116358606), (20, 0.09485391262691763), (21, 0.10662430501787755), (22, 0.0990016672311163), (23, 0.08944531547826363), (24, 0.09035707581805477), (25, 0.10596685725474866), (26, 0.2082932938706191), (27, 0.11898476311893971), (28, 0.13933259997555927), (29, 0.10349620070749324), (30, 0.13013734807442964), (31, 0.09084772879553214), (32, 0.07147489056051815), (33, 0.0908479552027815), (34, 0.09173645799410378), (35, 0.13718410398008346), (36, 0.24684217639625078), (37, 0.22690205127414625), (38, 0.1

In [36]:
sorted_similar_web_series = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_web_series)

[(1, 1.0), (2057, 0.3054810080867889), (123, 0.299241017860076), (9732, 0.2941552799923807), (600, 0.2904526619939651), (17, 0.2900606482683779), (1516, 0.2889661831482772), (106, 0.28872539656585805), (8232, 0.2881087711291763), (13, 0.28384238685806273), (10712, 0.28382486084173525), (170, 0.280433645622461), (1961, 0.2735824854430672), (612, 0.27341944041504174), (143, 0.26861269955323597), (357, 0.26821362034916385), (10012, 0.26727913084820143), (1018, 0.26660047903578254), (7441, 0.2627134612422468), (596, 0.2622232442815631), (98, 0.2615356026629199), (10888, 0.26051355050935426), (616, 0.2592794781092421), (10217, 0.25920665398516674), (521, 0.25909504297046415), (1796, 0.25753526959686857), (3228, 0.2551536933256249), (1116, 0.2522542198114627), (172, 0.25156177342127445), (216, 0.24997033777025288), (210, 0.24912113816022513), (2123, 0.2484606821969567), (36, 0.24684217639625078), (2126, 0.24605521814678968), (8652, 0.24528345392117418), (316, 0.24475101168712632), (9095, 0.2

In [38]:
print('Web Series Suggested For You : \n')

i = 1

for web_series in sorted_similar_web_series:
  index = web_series[0]
  title_from_index = web_series_data[web_series_data.index==index]['Series Title'].values[0]
  if (i<6):
    print(i, '.',title_from_index)
    i+=1

Web Series Suggested For You : 

1 . Game of Thrones
2 . The World Between Us
3 . The Night Of
4 . The Weight of the Nation for Kids
5 . Looking


#Web Series Recommendation For Samy

In [42]:
Web_Series_name = input(' Enter your favourite Web Series name : ')

list_of_all_titles = web_series_data['Series Title'].tolist()

find_close_match = difflib.get_close_matches(Web_Series_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_web_series = web_series_data[web_series_data['Series Title'] == close_match].index[0]

similarity_score = list(enumerate(similarity[index_of_the_web_series]))

sorted_similar_web_series = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Web Series Suggested For You : \n')

i = 1

for web_series in sorted_similar_web_series:
  index = web_series[0]
  title_from_index = web_series_data[web_series_data.index==index]['Series Title'].values[0]
  if (i<11):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite Web Series name : Game of Thrones
Web Series Suggested For You : 

1 . Game of Thrones
2 . The World Between Us
3 . The Night Of
4 . The Weight of the Nation for Kids
5 . Looking
6 . Band of Brothers
7 . The Brink
8 . Rome
9 . A Little Curious
10 . The Wire
