## Importing Dependencies/Library Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df = pd.read_csv("bollywood_full.csv")
df.head()

Unnamed: 0,title_x,imdb_id,poster_path,wiki_link,title_y,original_title,is_adult,year_of_release,runtime,genres,imdb_rating,imdb_votes,story,summary,tagline,actors,wins_nominations,release_date
0,Uri: The Surgical Strike,tt8291224,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Uri:_The_Surgica...,Uri: The Surgical Strike,Uri: The Surgical Strike,0,2019,138,Action|Drama|War,8.4,35112,Divided over five chapters the film chronicle...,Indian army special forces execute a covert op...,,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...,4 wins,11 January 2019 (USA)
1,Battalion 609,tt9472208,,https://en.wikipedia.org/wiki/Battalion_609,Battalion 609,Battalion 609,0,2019,131,War,4.1,73,The story revolves around a cricket match betw...,The story of Battalion 609 revolves around a c...,,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...,,11 January 2019 (India)
2,The Accidental Prime Minister (film),tt6986710,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/The_Accidental_P...,The Accidental Prime Minister,The Accidental Prime Minister,0,2019,112,Biography|Drama,6.1,5549,Based on the memoir by Indian policy analyst S...,Explores Manmohan Singh's tenure as the Prime ...,,Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S...,,11 January 2019 (USA)
3,Why Cheat India,tt8108208,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Why_Cheat_India,Why Cheat India,Why Cheat India,0,2019,121,Crime|Drama,6.0,1891,The movie focuses on existing malpractices in ...,The movie focuses on existing malpractices in ...,,Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ...,,18 January 2019 (USA)
4,Evening Shadows,tt6028796,,https://en.wikipedia.org/wiki/Evening_Shadows,Evening Shadows,Evening Shadows,0,2018,102,Drama,7.3,280,While gay rights and marriage equality has bee...,Under the 'Evening Shadows' truth often plays...,,Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva...,17 wins & 1 nomination,11 January 2019 (India)


In [4]:
df.shape

(4329, 18)

In [5]:
df[df['original_title'] == 'Battalion 609']['summary']

1    The story of Battalion 609 revolves around a c...
Name: summary, dtype: object

In [6]:
df1 = df.drop(['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y', 'is_adult', 'year_of_release','runtime', 'imdb_rating', 'imdb_votes', 'story', 'tagline', 'wins_nominations', 'release_date'], axis = 1)
df1.head()

Unnamed: 0,original_title,genres,summary,actors
0,Uri: The Surgical Strike,Action|Drama|War,Indian army special forces execute a covert op...,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...
1,Battalion 609,War,The story of Battalion 609 revolves around a c...,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...
2,The Accidental Prime Minister,Biography|Drama,Explores Manmohan Singh's tenure as the Prime ...,Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S...
3,Why Cheat India,Crime|Drama,The movie focuses on existing malpractices in ...,Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ...
4,Evening Shadows,Drama,Under the 'Evening Shadows' truth often plays...,Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva...


In [7]:
df1.describe()

Unnamed: 0,original_title,genres,summary,actors
count,4329,4329,4329,4320
unique,4046,263,4021,4271
top,Dushman,Drama,Add a Plot »,Asha Parekh|Dharmendra|Ravindra Kapoor|Nasir H...
freq,4,517,264,2


In [8]:
df1.shape

(4329, 4)

In [9]:
df1.isnull().any()

original_title    False
genres            False
summary           False
actors             True
dtype: bool

In [10]:
df1.isnull().sum()

original_title    0
genres            0
summary           0
actors            9
dtype: int64

In [11]:
df1[df1['actors'].isnull() == True]

Unnamed: 0,original_title,genres,summary,actors
474,Dee Saturday Night,Drama,Add a Plot »,
966,Bal Ganesh 2,Animation,Lord Ganesha evolves from a mischievous boy w...,
1442,Final Solution,Documentary,The Final Solution is a 2003 documentary direc...,
1456,Ganges: River to Heaven,Documentary,Add a Plot »,
1547,Jang Aur Aman,Documentary|War,Add a Plot »,
2429,Bahurani,Comedy|Drama|Family,Amit (Rakesh Roshan) is a big city boy but hi...,
3441,Ek Anek Aur Ekta,Animation|Short,A young girl teaches her younger sibling about...,
4196,26-Jan,Action,Add a Plot »,
4328,Meena Bazaar,Drama,Add a Plot »,


In [12]:
final_features = ['original_title', 'genres', 'summary', 'actors']

for feature in final_features:
    print(f"Checking for Null Features in {feature}...")
    is_null = df[feature].isnull().any()
    if is_null == True:
        print(f"Changing Null Feature Values for {feature}...")
        df1[feature] = df1[feature].fillna('unknown')
        print("Null values Removed Sucessfully, your Data is Clean !")
    else:
        print(f"No Null Values found in {feature}\n")

Checking for Null Features in original_title...
No Null Values found in original_title

Checking for Null Features in genres...
No Null Values found in genres

Checking for Null Features in summary...
No Null Values found in summary

Checking for Null Features in actors...
Changing Null Feature Values for actors...
Null values Removed Sucessfully, your Data is Clean !


In [13]:
df1['actors'].isnull().any() 

False

In [14]:
df1['genres'] = df1['genres'].apply(lambda x: x.replace('|', ' '))

df1['actors'] = df1['actors'].apply(lambda x: x.replace('|', ' '))

df1.head()

Unnamed: 0,original_title,genres,summary,actors
0,Uri: The Surgical Strike,Action Drama War,Indian army special forces execute a covert op...,Vicky Kaushal Paresh Rawal Mohit Raina Yami Ga...
1,Battalion 609,War,The story of Battalion 609 revolves around a c...,Vicky Ahuja Shoaib Ibrahim Shrikant Kamat Elen...
2,The Accidental Prime Minister,Biography Drama,Explores Manmohan Singh's tenure as the Prime ...,Anupam Kher Akshaye Khanna Aahana Kumra Atul S...
3,Why Cheat India,Crime Drama,The movie focuses on existing malpractices in ...,Emraan Hashmi Shreya Dhanwanthary Snighdadeep ...
4,Evening Shadows,Drama,Under the 'Evening Shadows' truth often plays...,Mona Ambegaonkar Ananth Narayan Mahadevan Deva...


In [15]:
df1.sort_values('genres')
df1.head()

Unnamed: 0,original_title,genres,summary,actors
0,Uri: The Surgical Strike,Action Drama War,Indian army special forces execute a covert op...,Vicky Kaushal Paresh Rawal Mohit Raina Yami Ga...
1,Battalion 609,War,The story of Battalion 609 revolves around a c...,Vicky Ahuja Shoaib Ibrahim Shrikant Kamat Elen...
2,The Accidental Prime Minister,Biography Drama,Explores Manmohan Singh's tenure as the Prime ...,Anupam Kher Akshaye Khanna Aahana Kumra Atul S...
3,Why Cheat India,Crime Drama,The movie focuses on existing malpractices in ...,Emraan Hashmi Shreya Dhanwanthary Snighdadeep ...
4,Evening Shadows,Drama,Under the 'Evening Shadows' truth often plays...,Mona Ambegaonkar Ananth Narayan Mahadevan Deva...


In [16]:
len(df1)

4329

In [17]:
df1['original_title'][4328]

'Meena Bazaar'

In [18]:
df1['index'] = range(0, len(df1))
df1.head()

Unnamed: 0,original_title,genres,summary,actors,index
0,Uri: The Surgical Strike,Action Drama War,Indian army special forces execute a covert op...,Vicky Kaushal Paresh Rawal Mohit Raina Yami Ga...,0
1,Battalion 609,War,The story of Battalion 609 revolves around a c...,Vicky Ahuja Shoaib Ibrahim Shrikant Kamat Elen...,1
2,The Accidental Prime Minister,Biography Drama,Explores Manmohan Singh's tenure as the Prime ...,Anupam Kher Akshaye Khanna Aahana Kumra Atul S...,2
3,Why Cheat India,Crime Drama,The movie focuses on existing malpractices in ...,Emraan Hashmi Shreya Dhanwanthary Snighdadeep ...,3
4,Evening Shadows,Drama,Under the 'Evening Shadows' truth often plays...,Mona Ambegaonkar Ananth Narayan Mahadevan Deva...,4


In [19]:
combined_features = df1['genres'] + df1['summary'] + df1['actors']
print(combined_features)

0       Action Drama WarIndian army special forces exe...
1       WarThe story of Battalion 609 revolves around ...
2       Biography DramaExplores Manmohan Singh's tenur...
3       Crime DramaThe movie focuses on existing malpr...
4       DramaUnder the 'Evening Shadows'  truth often ...
                              ...                        
4324    DramaThe story is based on the true incident a...
4325    DramaAfter the death of his wife  a policeman ...
4326    Drama FamilyAdd a Plot »Raj Kapoor Rehana Om P...
4327    DramaThakur Jaspal Singh lives in the prestigi...
4328                             DramaAdd a Plot »unknown
Length: 4329, dtype: object


In [20]:
vect = TfidfVectorizer()

feature_vectors = vect.fit_transform(combined_features)
print(feature_vectors)

  (0, 1721)	0.0992912308598386
  (0, 8625)	0.1960304472433644
  (0, 680)	0.16773842929616536
  (0, 626)	0.12894751018029593
  (0, 6047)	0.18037662208599592
  (0, 15712)	0.1520846041387969
  (0, 18480)	0.17121972137608993
  (0, 1192)	0.20351470892970705
  (0, 13688)	0.18687354653345836
  (0, 14564)	0.1520846041387969
  (0, 16445)	0.14640899550881548
  (0, 11903)	0.11672677282430413
  (0, 9921)	0.15968339081801597
  (0, 13731)	0.17121972137608993
  (0, 8109)	0.17121972137608993
  (0, 8722)	0.048098977948630335
  (0, 13004)	0.13341514661389056
  (0, 9164)	0.16206282066618394
  (0, 9030)	0.13810441089020192
  (0, 6560)	0.12333633439266757
  (0, 18447)	0.15556589621872147
  (0, 12957)	0.13810441089020192
  (0, 10550)	0.13725209479890949
  (0, 13234)	0.09478768077553933
  (0, 11908)	0.09440811392609863
  :	:
  (4327, 3552)	0.15784340732663601
  (4327, 9468)	0.09073498027151451
  (4327, 7553)	0.10812405078812375
  (4327, 10523)	0.15162405083364688
  (4327, 17408)	0.08554196077351368
  (4327, 

In [21]:
simi = cosine_similarity(feature_vectors)
simi

array([[1.        , 0.07086042, 0.00890284, ..., 0.0119156 , 0.00268543,
        0.        ],
       [0.07086042, 1.        , 0.04335727, ..., 0.        , 0.01656873,
        0.        ],
       [0.00890284, 0.04335727, 1.        , ..., 0.        , 0.03754549,
        0.        ],
       ...,
       [0.0119156 , 0.        , 0.        , ..., 1.        , 0.        ,
        0.09786375],
       [0.00268543, 0.01656873, 0.03754549, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.09786375, 0.        ,
        1.        ]])

In [22]:
simi.shape

(4329, 4329)

In [23]:
title_to_list = df1['original_title'].tolist()
title_to_list

['Uri: The Surgical Strike',
 'Battalion 609',
 'The Accidental Prime Minister',
 'Why Cheat India',
 'Evening Shadows',
 'Soni',
 'Fraud Saiyyan',
 'Bombairiya',
 'Manikarnika: The Queen of Jhansi',
 'Thackeray',
 'Amavas',
 'Gully Boy',
 'Hum chaar',
 'Total Dhamaal',
 'Sonchiriya',
 'Badla',
 'Mard Ko Dard Nahin Hota',
 'Hamid',
 'Photograph',
 'Risknamaa',
 'Mere Pyare Prime Minister',
 '22 Yards',
 'Kesari',
 'Notebook',
 'Junglee',
 'Gone Kesh',
 'Albert Pinto Ko Gussa Kyun Aata Hai?',
 'The Tashkent Files',
 'Kalank',
 'Setters',
 'Student of the Year 2',
 'PM Narendra Modi',
 'De De Pyaar De',
 "India's Most Wanted",
 'Yeh Hai India',
 'Khamoshi',
 'Kabir Singh',
 'Article 15',
 'One Day: Justice Delivered',
 'Hume Tumse Pyaar Kitna',
 'Super 30',
 'Family of Thakurganj',
 'Batla House',
 'Jhootha Kahin Ka',
 'Judgementall Hai Kya',
 'Chicken Curry Law',
 'Arjun Patiala',
 'Jabariya Jodi',
 'Pranaam',
 'The Sky Is Pink',
 'Mission Mangal',
 'Saaho',
 'Dream Girl',
 'Section 375

In [24]:
user_input = input("Enter your Favorite Movie Name: ")

find_match = difflib.get_close_matches(user_input, title_to_list)
print(find_match)

Enter your Favorite Movie Name: Housefull
['Housefull', 'Housefull 4', 'Housefull 2']


In [25]:
len(find_match)

3

In [26]:
closest_match = find_match[0]
print(closest_match)

Housefull


In [27]:
closest_match_movie_index = df1[df1.original_title == closest_match]['index'].values[0]
print(closest_match_movie_index)

857


In [28]:
df1['original_title'][857]

'Housefull'

In [29]:
simi_output = list(enumerate(simi[closest_match_movie_index]))
print(simi_output)

[(0, 0.029154508310161645), (1, 0.012939591495438362), (2, 0.017129721179811923), (3, 0.00387750355336335), (4, 0.0062492150948044774), (5, 0.003772506823832812), (6, 0.006967380003226716), (7, 0.05634816474714833), (8, 0.01106449912164487), (9, 0.001995555121394202), (10, 0.012614076098278859), (11, 0.010150348692679577), (12, 0.03474854051526661), (13, 0.11829273874692367), (14, 0.007148421339663678), (15, 0.010497116247593304), (16, 0.010835399466158023), (17, 0.016282204544848623), (18, 0.012550791685545211), (19, 0.0071035848723589355), (20, 0.0103939016912338), (21, 0.0054926639534198475), (22, 0.04190179131840186), (23, 0.004337372594466519), (24, 0.004376242916779494), (25, 0.07369304566497045), (26, 0.016466705363360847), (27, 0.025156883865655504), (28, 0.030432917938023916), (29, 0.01704566652271033), (30, 0.008094425279700286), (31, 0.04059828884554606), (32, 0.013641774231397865), (33, 0.030741898379880627), (34, 0.004433738861814987), (35, 0.002648702775477329), (36, 0.05

In [30]:
len(simi_output)

4329

In [31]:
sorted_simi_output = sorted(simi_output, key = lambda x: x[1], reverse = True)
sorted_simi_output

[(857, 0.9999999999999998),
 (736, 0.19186800822563221),
 (847, 0.18351190966843092),
 (1085, 0.15374233032829435),
 (549, 0.15314916894627187),
 (1114, 0.15267781148223733),
 (1137, 0.14023389965868846),
 (1168, 0.13703403119657906),
 (379, 0.1343138276672695),
 (1333, 0.1294092988642788),
 (1831, 0.12755895694653058),
 (1178, 0.12494723996084584),
 (1370, 0.12190608221293017),
 (2337, 0.1213583026094234),
 (577, 0.12039813833155365),
 (1249, 0.12002723916388418),
 (13, 0.11829273874692367),
 (895, 0.11782212525698915),
 (669, 0.11629529532489111),
 (1196, 0.11369099917622537),
 (700, 0.11360233955714452),
 (4102, 0.11273758227656822),
 (1317, 0.11240148907115279),
 (713, 0.11189464692504895),
 (698, 0.11163546372459333),
 (1246, 0.10969260763200026),
 (924, 0.10644102710792842),
 (968, 0.1061714193937845),
 (514, 0.1051866602506731),
 (427, 0.10494124900801376),
 (530, 0.10464319725375669),
 (1829, 0.10449804379194541),
 (705, 0.10430277113963744),
 (632, 0.1034668457422716),
 (1170,

In [32]:
df1['original_title'][736]

'Housefull 2'

In [33]:
print("Best Movie Recommendations for You: \n")

i = 1
for index in sorted_simi_output:
    movie_index = index[0]
    title_from_movie_index = df1[df1['index'] == movie_index]['original_title'].values[0]
    
    if i<11:
        print(title_from_movie_index)
        i+=1

Best Movie Recommendations for You: 

Housefull
Housefull 2
Jaane Kahan Se Aayi Hai
Heyy Babyy
Happy New Year
Om Shanti Om
Welcome
Don
Roy
Bardaasht


In [54]:
df1[df1['original_title'] == "Jaane Kahan Se Aayi Hai"]

Unnamed: 0,original_title,genres,summary,actors,index
847,Jaane Kahan Se Aayi Hai,Comedy Drama Fantasy,Rejected by the woman he loves a man must fin...,Priyanka Chopra Akshay Kumar Jacqueline Fernan...,847


In [35]:
df1[df1['original_title'] == "Housefull"]

Unnamed: 0,original_title,genres,summary,actors,index
857,Housefull,Comedy,Believing himself to be jinxed a man attempts...,Akshay Kumar Deepika Padukone Riteish Deshmukh...,857


In [36]:
df1[df1['original_title'] == "Housefull 2"]

Unnamed: 0,original_title,genres,summary,actors,index
736,Housefull 2,Action Comedy Musical,A comedy of errors wherein four men help each ...,Akshay Kumar John Abraham Asin Jacqueline Fern...,736


In [66]:
def movie_rec():
    user_input = input("Enter your Favorite Movie Name: ")
    find_match = difflib.get_close_matches(user_input, title_to_list)
    closest_match = find_match[0]
    closest_match_movie_index = df1[df1.original_title == closest_match]['index'].values[0]
    simi_output = list(enumerate(simi[closest_match_movie_index]))
    sorted_simi_output = sorted(simi_output, key = lambda x: x[1], reverse = True)
    
    print("Best Movie Recommendations for You: \n")

    i = 1
    for index in sorted_simi_output:
        movie_index = index[0]
        title_from_movie_index = df1[df1['index'] == movie_index]['original_title'].values[0]

        if i<11:
            print(title_from_movie_index)
            i+=1

In [67]:
movie_rec()

Enter your Favorite Movie Name: Housefull
Best Movie Recommendations for You: 

Housefull
Housefull 2
Jaane Kahan Se Aayi Hai
Heyy Babyy
Happy New Year
Om Shanti Om
Welcome
Don
Roy
Bardaasht
