<a href="https://colab.research.google.com/github/dheeraj25406/ML-Projects/blob/main/movie_recommend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the dependencies

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


Data Collection and Preprocessing

In [None]:
#loading dataset from csv file to pandas dataframe
movies_data=pd.read_csv('/content/TeluguMovies_dataset.csv')

In [None]:
movies_data.head()

Unnamed: 0,Index,Movie,Year,Certificate,Genre,Overview,Runtime,Rating,No.of.Ratings
0,0,Bahubali: The Beginning,2015.0,UA,"Action, Drama","In ancient India, an adventurous and darin...",159,8.1,99114
1,1,Baahubali 2: The Conclusion,2017.0,UA,"Action, Drama","When Shiva, the son of Bahubali, learns ab...",167,8.2,71458
2,2,1 - Nenokkadine,2014.0,UA,"Action, Thriller",A rock star must overcome his psychologica...,170,8.1,42372
3,3,Dhoom:3,2013.0,UA,"Action, Thriller","When Sahir, a circus entertainer trained i...",172,5.4,42112
4,4,Ra.One,2011.0,U,"Action, Adventure, Sci-Fi",When the titular antagonist of an action g...,156,4.6,37211


In [None]:
#number of rows and columns in dataset
movies_data.shape

(1400, 9)

In [None]:
#selecting features for calculating cosine similarities
selected_features=['Genre','Overview']

In [None]:
print(selected_features)

['Genre', 'Overview']


In [None]:
#replacing empty values with null
for feature in selected_features:
  movies_data[feature]=movies_data[feature].fillna('')

In [None]:
#combining the selected values
combined_features=movies_data['Genre']+' '+movies_data['Overview']

In [None]:
print(combined_features)

0       Action, Drama                 In ancient India...
1       Action, Drama                 When Shiva, the ...
2       Action, Thriller                 A rock star m...
3       Action, Thriller                 When Sahir, a...
4       Action, Adventure, Sci-Fi                 When...
                              ...                        
1395                                                     
1396    Comedy, Drama                 The movie is abo...
1397    Drama, Romance                 Muvva Gopaludu ...
1398         Hero Charan (Tarun) a middle class family...
1399    Drama                 Surendra marries Savitri...
Length: 1400, dtype: object


In [None]:
#loading the vectorizer as vectorizer
vectorizer=TfidfVectorizer()

In [None]:
#converting textual data into numbers and storing in feature_vectors
feature_vectors=vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 30372 stored elements and shape (1400, 5437)>
  Coords	Values
  (0, 117)	0.08236987903421875
  (0, 1422)	0.07691035351646491
  (0, 2273)	0.17581621681899193
  (0, 276)	0.30485129679550443
  (0, 2286)	0.20289565072959406
  (0, 266)	0.11830459504933362
  (0, 163)	0.3151030583410978
  (0, 277)	0.08010763987538654
  (0, 1174)	0.346947938326426
  (0, 2847)	0.14365964127076553
  (0, 531)	0.21956841838511332
  (0, 2374)	0.2575480577572813
  (0, 1205)	0.30485129679550443
  (0, 3282)	0.2367827536134717
  (0, 1723)	0.28939293774260944
  (0, 571)	0.19552911647629348
  (0, 5022)	0.1757804527187766
  (0, 5296)	0.3151030583410978
  (0, 3437)	0.21415717710767618
  (1, 117)	0.06800083761557615
  (1, 1422)	0.06349370087404173
  (1, 2273)	0.0725729486935782
  (1, 5331)	0.11951839834195103
  (1, 4335)	0.22176679516331324
  (1, 4835)	0.12235117240934917
  :	:
  (1399, 277)	0.053062479266743776
  (1399, 3270)	0.056137072556286624
  (1399, 4902)	

Cosine Similarity

In [None]:
#storing cosine similarities in similarity
similarity=cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.02324405 0.00735101 ... 0.02286047 0.01181832 0.04621893]
 [0.02324405 1.         0.05750428 ... 0.03434865 0.08728437 0.06353   ]
 [0.00735101 0.05750428 1.         ... 0.         0.04562549 0.01262951]
 ...
 [0.02286047 0.03434865 0.         ... 1.         0.05559826 0.0149011 ]
 [0.01181832 0.08728437 0.04562549 ... 0.05559826 1.         0.0312492 ]
 [0.04621893 0.06353    0.01262951 ... 0.0149011  0.0312492  1.        ]]


In [None]:
print(similarity.shape)

(1400, 1400)


In [None]:
movie_name=input('Enter your favourite movie: ')

Enter your favourite movie: Baahubali The Benigging


In [None]:
#creating a list with all the movie names present in dataset
list_of_all_titles=movies_data['Movie'].tolist()
print(list_of_all_titles)

['Bahubali: The Beginning', 'Baahubali 2: The Conclusion', '1 - Nenokkadine', 'Dhoom:3', 'Ra.One', 'Dhoom:2', 'Eega', 'Krrish 3', 'Arjun Reddy', 'Rangasthalam', 'Magadheera', 'War', 'Bharat Ane Nenu', 'Saaho', 'Theri', 'Dookudu', 'Pokiri', 'Sarkar', 'Athadu', 'The Ghazi Attack', 'Kabali', 'MSG: The Messenger of God', 'Nanban', 'Srimanthudu', 'Veer - Vivegam', 'Billa 2', 'Manam', '7 Aum Arivu', 'Bigil', 'Business Man', 'Geetha Govindam', 'Mahanati', 'Spyder', 'Nannaku Prematho', 'Dabangg 3', 'MSG 2 the Messenger', 'Manikarnika: The Queen of Jhansi', 'Race Gurram', 'Okkadu', 'Bommarillu', 'Atharintiki Daaredi', 'Khaleja', 'Yennai Arindhaal', 'Thalaivaa', 'Kaala', 'Bairavaa', 'Goodachari', 'Puli', 'Pulimurugan', 'Veeram', 'Vedam', 'Yevadu', 'Aravindha Sametha Veera Raghava', 'Billa', 'Jersey', 'Sye Raa Narasimha Reddy', 'Ala Vaikunthapurramuloo', 'Janatha Garage', 'Gabbar Singh', 'Temper', 'Game Over', 'Singam 2', 'Dhruva', 'Jalsa', 'Maharshi', 'Pelli Choopulu', 'Arya 2', 'Chekka Chivanth

In [None]:
#finding a close match for the movie name given by the user
find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)

In [None]:
print(find_close_match)

['Bahubali: The Beginning', 'Baahubali 2: The Conclusion']


In [None]:
close_match=find_close_match[0]
print(close_match)

Bahubali: The Beginning


In [None]:
#finding the index of the movie with title
index_of_the_movie=movies_data[movies_data.Movie==close_match]['Index'].values[0]
print(index_of_the_movie)

0


In [None]:
#getting a list of similar movies
similarity_score=list(enumerate(similarity[index_of_the_movie]))

In [None]:
print(similarity_score)

[(0, np.float64(1.0000000000000002)), (1, np.float64(0.023244045031615514)), (2, np.float64(0.007351010592699853)), (3, np.float64(0.03600817542313371)), (4, np.float64(0.021803643389756537)), (5, np.float64(0.009669259336848292)), (6, np.float64(0.04302601549909576)), (7, np.float64(0.048683441224766894)), (8, np.float64(0.017409043936582852)), (9, np.float64(0.01925926283186412)), (10, np.float64(0.015075677378666368)), (11, np.float64(0.03698085846392866)), (12, np.float64(0.07086297098304602)), (13, np.float64(0.16142690811294846)), (14, np.float64(0.010173869908571859)), (15, np.float64(0.019633101048071387)), (16, np.float64(0.012599644407574755)), (17, np.float64(0.05242950200645333)), (18, np.float64(0.03448340389744975)), (19, np.float64(0.0051378353010509414)), (20, np.float64(0.009293265711139892)), (21, np.float64(0.05461806003334637)), (22, np.float64(0.036372770840818226)), (23, np.float64(0.09162967565432889)), (24, np.float64(0.020700896581761472)), (25, np.float64(0.03

In [None]:
len(similarity_score)

1400

In [None]:
#sorting the movies based on similarity score
sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)
print(sorted_similar_movies)

[(0, np.float64(1.0000000000000002)), (701, np.float64(0.2045749076727073)), (916, np.float64(0.19931863434750124)), (283, np.float64(0.1979854231564656)), (741, np.float64(0.1947646623764026)), (430, np.float64(0.18308533622182116)), (13, np.float64(0.16142690811294846)), (31, np.float64(0.15281071585764722)), (595, np.float64(0.14732434063156305)), (1200, np.float64(0.1397493934530699)), (462, np.float64(0.13535015116466756)), (549, np.float64(0.13272135819626052)), (667, np.float64(0.1326148611024624)), (961, np.float64(0.1252588831773866)), (299, np.float64(0.12258521200400627)), (383, np.float64(0.11658186492916289)), (989, np.float64(0.11557097344145115)), (87, np.float64(0.11526277514063393)), (276, np.float64(0.11499396680730431)), (1316, np.float64(0.11387636204060347)), (638, np.float64(0.11269427425623467)), (675, np.float64(0.11269427425623467)), (685, np.float64(0.11269427425623467)), (922, np.float64(0.11269427425623467)), (1044, np.float64(0.11269427425623467)), (1072, n

In [None]:
#print the name of similar movies based on the index
print('Movies suggested for you: \n')
i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movies_data[movies_data.index==index]['Movie'].values[0]
  if (i<=30):
    print(i,'.',title_from_index)
    i+=1


Movies suggested for you: 

1 . Bahubali: The Beginning
2 . Assembly Rowdy
3 . Anthaka Mundu Aa Tarvatha
4 . Sree
5 . Rowdy Fellow
6 . Bobby
7 . Saaho
8 . Mahanati
9 . Chinnadana Nee Kosam
10 . Ramudochhadu
11 . Bhadra
12 . Andhrawala
13 . O Pitta Katha
14 . Guntur Talkies
15 . Oh My Friend
16 . Gayatri
17 . Tenali Ramakrishna BA.BL
18 . Sarrainodu
19 . Kaashmora
20 . Rajdooth
21 . Kondaveeti Raja
22 . Kondaveeti Simhasanam
23 . Eeshwar
24 . Athanokkade
25 . Rikshavodu
26 . Dhruvanakshatram
27 . Mugguru Kodukulu
28 . Vijay
29 . Varasudu
30 . Anna Thamudu


Movie Recommendation System

In [None]:
movie_name=input('Enter your favourite movie: ')

list_of_all_titles=movies_data['Movie'].tolist()

find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)

close_match=find_close_match[0]

index_of_the_movie=movies_data[movies_data.Movie==close_match]['Index'].values[0]

similarity_score=list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)

print('Movies suggested for you: \n')
i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movies_data[movies_data.index==index]['Movie'].values[0]
  if (i<=30):
    print(i,'.',title_from_index)
    i+=1


Enter your favourite movie: Dhruva
Movies suggested for you: 

1 . Dhruva
2 . Ayogya
3 . Temper
4 . Trinetrudu
5 . Sarileru Neekevvaru
6 . Ganesh
7 . Penguin
8 . Officer
9 . Yennai Arindhaal
10 . Nakshatram
11 . Bruce Lee: The Fighter
12 . Yaan
13 . Super Police
14 . Gudachari No.1
15 . Thaandavam
16 . Seenu
17 . Hare Ram
18 . Vinaya Vidheya Rama
19 . Legend
20 . Brahma
21 . State Rowdy
22 . Neeku Naaku Dash Dash
23 . Golimar
24 . Paisa
25 . Driver Ramudu
26 . Asthram
27 . Geethanjali
28 . Operation Duryodhana
29 . M Dharmaraju M.A.
30 . Allari Police
