In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np


In [55]:
credits = pd.read_csv("tmdb_5000_credits.csv")
movies = pd.read_csv("tmdb_5000_movies.csv")
print(movies.head(10))
print(credits.head(10))

      budget                                             genres  \
0  237000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
1  300000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
2  245000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
3  250000000  [{"id": 28, "name": "Action"}, {"id": 80, "nam...   
4  260000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
5  258000000  [{"id": 14, "name": "Fantasy"}, {"id": 28, "na...   
6  260000000  [{"id": 16, "name": "Animation"}, {"id": 10751...   
7  280000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
8  250000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
9  250000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   

                                            homepage      id  \
0                        http://www.avatarmovie.com/   19995   
1       http://disney.go.com/disneypictures/pirates/     285   
2        http://www.sonypictures.com/movies/spectre/  206647   
3     

In [56]:
movies = movies.merge(credits, on='title')
movies = movies[['movie_id', 'title', 'cast', 'crew', 'genres', 'budget', 'revenue', 'vote_average']]


In [57]:
def extract_names(data):
    parsed_data = eval(data)
    names = [entry['name'] for entry in parsed_data if 'name' in entry]
    return names

def extract_top_3_cast(data):
    parsed_data = eval(data)
    names = [entry['name'] for entry in parsed_data[:3] if 'name' in entry]
    return names

def extract_director(data):
    parsed_data = eval(data)
    names = [entry['name'] for entry in parsed_data if entry.get('job') == 'Director']
    return names

# ye 3no functions cast crew and genres se name ki value extract krne k lye bnye h

In [58]:
movies['genres'] = movies['genres'].apply(extract_names)
movies['cast'] = movies['cast'].apply(extract_top_3_cast)
movies['crew'] = movies['crew'].apply(extract_director)


In [59]:
movies.fillna({'budget': 0, 'revenue': 0, 'vote_average': movies['vote_average'].mean()}, inplace=True)
# Filling NULL values

In [77]:
# Regression
def combine_features(row):
    return " ".join(row['genres'] + row['cast'] + row['crew']).lower()

movies['tags'] = movies.apply(combine_features, axis=1)  

In [78]:
X = movies[['budget', 'revenue']]
y = movies['vote_average']

In [79]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [73]:
reg_model = LinearRegression()
reg_model.fit(X_train, y_train)
y_pred = reg_model.predict(X_test)

In [80]:
# mean squared error
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Regression RMSE : ",rmse)


Regression RMSE :  1.1807791979316857


In [85]:
def recommend_movies(movie_name):
    movie = movies[movies['title'].str.lower() == movie_name.lower()]
    if movie.empty:
        print("Movie '{movie_name}' not found in the dataset")

    budget = movie.iloc[0]['budget']
    revenue = movie.iloc[0]['revenue']

    predicted_rating = reg_model.predict([[budget, revenue]])

    recommendations = movies[(movies['budget'] <= budget * 1.2) &(movies['budget'] >= budget * 0.8) &(movies['revenue'] <= revenue * 1.2) &(movies['revenue'] >= revenue * 0.8) &
        (movies['vote_average'] >= predicted_rating[0] - 1)].sort_values('vote_average', ascending=False).head(10)
    # vote average k hisaab se ascending order m movies show hongi
    
    return recommendations[['title', 'vote_average', 'genres']]


print(recommend_movies("The Dark Knight Rises"))

                                         title  vote_average  \
3                        The Dark Knight Rises           7.6   
22         The Hobbit: The Desolation of Smaug           7.6   
42                                 Toy Story 3           7.6   
8       Harry Potter and the Half-Blood Prince           7.4   
19   The Hobbit: The Battle of the Five Armies           7.1   
26                  Captain America: Civil War           7.1   
12  Pirates of the Caribbean: Dead Man's Chest           7.0   
98           The Hobbit: An Unexpected Journey           7.0   
1     Pirates of the Caribbean: At World's End           6.9   
29                                     Skyfall           6.9   

                                  genres  
3       [Action, Crime, Drama, Thriller]  
22                  [Adventure, Fantasy]  
42           [Animation, Family, Comedy]  
8           [Adventure, Fantasy, Family]  
19          [Action, Adventure, Fantasy]  
26  [Adventure, Action, Science Ficti



In [86]:
print(recommend_movies("Man of steel"))

                                                title  vote_average  \
46                         X-Men: Days of Future Past           7.5   
6                                             Tangled           7.4   
34                                Monsters University           7.0   
81                                         Maleficent           7.0   
30                                       Spider-Man 2           6.7   
45                                        World War Z           6.7   
55                                              Brave           6.7   
63  The Chronicles of Narnia: The Lion, the Witch ...           6.7   
24                                          King Kong           6.6   
79                                         Iron Man 2           6.6   

                                               genres  
46      [Action, Adventure, Fantasy, Science Fiction]  
6                                 [Animation, Family]  
34                                [Animation, Fami



In [87]:
print(recommend_movies("Batman"))

                                     title  vote_average  \
1543                             Toy Story           7.7   
1153            Back to the Future Part II           7.4   
1357                          The Hangover           7.2   
1699  Indiana Jones and the Temple of Doom           7.1   
1121                          Notting Hill           7.0   
1360                                Batman           7.0   
1361                                Batman           7.0   
1467                       The Maze Runner           7.0   
1544                                 Speed           6.8   
1338                              Twilight           5.8   

                                            genres  
1543                   [Animation, Comedy, Family]  
1153  [Adventure, Comedy, Family, Science Fiction]  
1357                                      [Comedy]  
1699                           [Adventure, Action]  
1121                      [Romance, Comedy, Drama]  
1360                 



In [88]:
print(recommend_movies("Rain Man"))

                                     title  vote_average  \
1543                             Toy Story           7.7   
1814                              Rain Man           7.6   
2280                    Dances with Wolves           7.6   
1699  Indiana Jones and the Temple of Doom           7.1   
1813                        Mrs. Doubtfire           7.0   
1544                                 Speed           6.8   
2315                              The Mask           6.6   
2020          There's Something About Mary           6.5   
1819                         The Bodyguard           6.1   

                                         genres  
1543                [Animation, Comedy, Family]  
1814                                    [Drama]  
2280                [Adventure, Drama, Western]  
1699                        [Adventure, Action]  
1813                    [Comedy, Drama, Family]  
1544                 [Action, Adventure, Crime]  
2315          [Romance, Comedy, Crime, Fantasy]  

