In [7]:
#importing necessary Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib
import nltk
import pickle


In [8]:
# Ensure NLTK resources are downloaded
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
# Load dataset
df = pd.read_csv(r"C:\Users\Abhishek\Desktop\project done\movie_rm_s\updated_bollywood_movies.csv")

# Text Cleaning
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Abhishek\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Abhishek\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Abhishek\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [9]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    return ' '.join(tokens)

df.fillna('', inplace=True)
df['movie_name'] = df['movie_name'].apply(preprocess_text)
df['genre'] = df['genre'].apply(preprocess_text)
df['cast'] = df['cast'].apply(preprocess_text)
df['director'] = df['director'].apply(preprocess_text)
df['overview'] = df['overview'].apply(preprocess_text)

# Tokenization
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['overview'])

# Save the tokenizer for future use
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(df['overview'])
padded_sequences = pad_sequences(sequences, maxlen=200)

# Encode labels for multi-class classification (genre, cast, etc.)
label_encoder = LabelEncoder()
df['genre_encoded'] = label_encoder.fit_transform(df['genre'])


In [10]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df['genre_encoded'], test_size=0.3, random_state=42)

In [11]:
# Model architecture: LSTM
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=200))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(df['genre_encoded'].unique()), activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.3)

# Save model for future use
model.save('movie_recommendation_system_lstm.keras')




Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 1s/step - accuracy: 0.0236 - loss: 5.2816 - val_accuracy: 0.0801 - val_loss: 4.5936
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 551ms/step - accuracy: 0.1003 - loss: 4.4724 - val_accuracy: 0.0693 - val_loss: 4.4094
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 581ms/step - accuracy: 0.0726 - loss: 4.3565 - val_accuracy: 0.0801 - val_loss: 4.4018
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 581ms/step - accuracy: 0.0905 - loss: 4.2650 - val_accuracy: 0.0801 - val_loss: 4.4128
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 628ms/step - accuracy: 0.0935 - loss: 4.2606 - val_accuracy: 0.0801 - val_loss: 4.4206
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 672ms/step - accuracy: 0.0862 - loss: 4.3069 - val_accuracy: 0.0801 - val_loss: 4.4256
Epoch 7/10
[1m34/34[0m

In [12]:
# Function to handle spelling mistakes
def correct_spelling(input_string, possible_values):
    close_matches = difflib.get_close_matches(input_string.lower(), possible_values, n=1)
    return close_matches[0] if close_matches else input_string


In [14]:
def recommend_movies(movie_name, genre, casting, year_of_release, year_range, top_n):
    # Ensure the 'year_of_release' is treated as numeric
    df['year_of_release'] = pd.to_numeric(df['year_of_release'], errors='coerce')

    # Initialize the filtered DataFrame based on initial filters
    df_filtered = df

    # Filter based on movie name if provided
    if movie_name:
        df_filtered = df_filtered[df_filtered['movie_name'].str.contains(movie_name, case=False, na=False)]

    # Filter based on genre if provided
    if genre:
        df_filtered = df_filtered[df_filtered['genre'].str.contains(genre, case=False, na=False)]

    # Filter based on casting if provided
    if casting:
        df_filtered = df_filtered[df_filtered['cast'].str.contains(casting, case=False, na=False)]

    # Filter based on year of release if provided
    if year_of_release:
        df_filtered = df_filtered[df_filtered['year_of_release'] == year_of_release]

    # Filter based on year range if provided
    if year_range:
        df_filtered = df_filtered[(df_filtered['year_of_release'] >= year_range[0]) & 
                                  (df_filtered['year_of_release'] <= year_range[1])]

    # Here, you can replace this with actual predictions from the model (mock predictions used)
    predictions = np.random.rand(df_filtered.shape[0])  # Mock predictions

    # Add predictions to the filtered DataFrame for display
    df_filtered['predictions'] = predictions

    # Sort by predictions and return the top N
    return df_filtered.nlargest(top_n, 'predictions')


def interactive_recommendation():
    while True:
        # Ask user for inputs one by one
        movie_name = input("Enter movie name (or leave blank): ")
        genre = input("Enter genre (or leave blank): ")
        casting = input("Enter casting (or leave blank): ")
        year_of_release = input("Enter year of release (or leave blank): ")
        
        # Convert year_of_release to int if provided
        if year_of_release:
            try:
                year_of_release = int(year_of_release)
            except ValueError:
                year_of_release = None

        # Ask for year range if desired
        year_range_input = input("Enter year range (e.g., 2000-2010) or leave blank: ")
        if year_range_input:
            try:
                year_range = list(map(int, year_range_input.split('-')))
            except ValueError:
                year_range = None
        else:
            year_range = None

        # Get top recommendations
        top_10 = recommend_movies(movie_name, genre, casting, year_of_release, year_range, top_n=10)
        if top_10.empty:
            print("No suitable recommendations found based on your search.")
        else:
            print("Top 10 Movie Recommendations:")
            print(top_10)

        # Ask if user wants more recommendations
        response = input("Would you like to see more recommendations? (yes/no): ").strip().lower()
        if response == 'yes':
            top_20 = recommend_movies(movie_name, genre, casting, year_of_release, year_range, top_n=20)
            print("Top 20 Movie Recommendations:")
            print(top_20)

            response = input("Would you like to see even more recommendations? (yes/no): ").strip().lower()
            if response == 'yes':
                top_50 = recommend_movies(movie_name, genre, casting, year_of_release, year_range, top_n=50)
                print("Top 50 Movie Recommendations:")
                print(top_50)
            else:
                print("ok....I will glad to help you again....Thank you!")
        elif response == 'no':
            print("ok....I will glad to help you again....Thank you!")
        else:
            print("ok....I will glad to help you again....Thank you!")
            break



In [15]:
# Call the function without parameters to start the interaction
interactive_recommendation()

Enter movie name (or leave blank):  
Enter genre (or leave blank):  
Enter casting (or leave blank):  
Enter year of release (or leave blank):  2010-2020
Enter year range (e.g., 2000-2010) or leave blank:  


Top 10 Movie Recommendations:
      sl no IMDB_movie_id        movie_name  year_of_release  \
1814   1887    tt19888200              maid           2022.0   
842     867     tt1170399        c kkompany           2008.0   
915     890     tt1223922              blue           2009.0   
1080   1126     tt2357489           zanjeer           2013.0   
1341   1399     tt6143422        kadvi hawa           2017.0   
1459   1496     tt2150177  bhaiaji superhit           2018.0   
1225   1258     tt5207116  x : past present           2015.0   
1519   1575     tt7255568    student year 2           2019.0   
692     720     tt0443208               rog           2005.0   
2086   1963    tt24964506          underbug           2023.0   

                             genre  \
1814                       romance   
842      comedy , drama , thriller   
915   action , adventure , mystery   
1080        action , crime , drama   
1341                         drama   
1459       action , comedy , drama   

Would you like to see more recommendations? (yes/no):  yes


Top 20 Movie Recommendations:
      sl no IMDB_movie_id                    movie_name  year_of_release  \
1725   1728    tt11213600            sardar ka grandson           2021.0   
36       35     tt0059893                          waqt           1965.0   
1231   1265     tt4023852               welcome karachi           2015.0   
791     825     tt0860454  mp3 : mera pehla pehla pyaar           2007.0   
112     119     tt0178186                        aandhi           1975.0   
2032   1907    tt18561736           great indian family           2023.0   
877     854     tt0476550            god tussi great ho           2008.0   
510     532     tt0289725                          jung           2000.0   
40       47     tt0137361                       anupama           1966.0   
1938   2013    tt28534557        cantt road : beginning           2023.0   
2142   2172     tt5785132                     aankhen 2              NaN   
1857   1776    tt15145764                        freddy   

Would you like to see even more recommendations? (yes/no):  


ok....I will glad to help you again....Thank you!


Enter movie name (or leave blank):  love
Enter genre (or leave blank):  horror
Enter casting (or leave blank):  
Enter year of release (or leave blank):  
Enter year range (e.g., 2000-2010) or leave blank:  2010-2020


No suitable recommendations found based on your search.


Would you like to see more recommendations? (yes/no):  no


ok....I will glad to help you again....Thank you!


Enter movie name (or leave blank):  
Enter genre (or leave blank):  
Enter casting (or leave blank):  
Enter year of release (or leave blank):  
Enter year range (e.g., 2000-2010) or leave blank:  


Top 10 Movie Recommendations:
      sl no IMDB_movie_id                 movie_name  year_of_release  \
1218   1240     tt3495030         dum laga ke haisha           2015.0   
925     959     tt1373156    karthik calling karthik           2010.0   
1779   1862    tt14767926               # homecoming           2022.0   
1281   1314     tt4900716               kapoor & son           2016.0   
1769   1715     tt9614452                   big bull           2021.0   
873     850     tt1292703    oye lucky ! lucky oye !           2008.0   
1491   1464     tt5935704                  padmaavat           2018.0   
738     705     tt0475645  vaah ! life ho toh aisi !           2005.0   
868     864     tt1288638                dasvidaniya           2008.0   
1441   1490     tt6580564                        102           2018.0   

                          genre  \
1218   comedy , drama , romance   
925   drama , mystery , romance   
1779            drama , musical   
1281    comedy , drama , f

Would you like to see more recommendations? (yes/no):  


ok....I will glad to help you again....Thank you!


In [16]:
# Save user input for future use
user_inputs = {
    'movie_name': 'inception',
    'genre': 'sci-fi',
    'casting': 'leonardo dicaprio',
    'year_of_release': 2010
}

with open('user_inputs.pickle', 'wb') as handle:
    pickle.dump(user_inputs, handle, protocol=pickle.HIGHEST_PROTOCOL)
