In [4]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

In [14]:
movies = pd.read_csv('movies.csv')
books = pd.read_csv('books.csv')
songs = pd.read_csv('songs.csv')

In [16]:
roberta = 'cardiffnlp/twitter-roberta-base-sentiment'  
labels = ['Negative', 'Neutral','Positive']  

model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)

def analyze_sentiment(text):
    tweet_words = []

    for word in text.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        elif word.startswith('http'):
            word = "http"
        tweet_words.append(word)

    tweet_proc = " ".join(tweet_words)
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    output = model(**encoded_tweet)
    
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    sentiment_probabilities = {labels[i]: scores[i] for i in range(len(labels))}
    return sentiment_probabilities


def extract_movies(text):
    cleaned_text = ' '.join([word for word in text.split() if not word.startswith('@')])
    cleaned_text = cleaned_text.lower()  
    print("Cleaned Text:", cleaned_text) 

    keyword_lst = {
            'Crime': ['murder', 'detective', 'robbery', 'criminal', 'crime', 'investigation', 'mystery', 'thief', 'homicide', 'heist', 'police', 'law', 'suspect', 'evidence', 'forensic', 'courtroom', 'gang', 'underworld', 'corruption', 'witness', 'interrogation', 'fugitive', 'justice', 'prison', 'conspiracy', 'hostage', 'ransom', 'bribery', 'blackmail', 'organized crime', 'alibi', 'surveillance', 'fear', 'tension', 'distrust'],
            
            'Comedy': ['funny', 'humor', 'laugh', 'comedy', 'joke', 'hilarious', 'stand-up', 'satire', 'parody', 'slapstick', 'witty', 'gag', 'prank', 'skit', 'absurd', 'farce', 'silly', 'laughable', 'banter', 'punchline', 'caricature', 'clown', 'spoof', 'sarcasm', 'one-liner', 'comedic timing', 'laugh track', 'joy', 'playfulness', 'lighthearted'],
                    
            'Family': ['family', 'kids', 'children', 'parent', 'siblings', 'wholesome', 'bonding', 'generations', 'togetherness', 'values', 'babysitting', 'holiday', 'relatives', 'tradition', 'care', 'protection', 'support', 'warmth', 'nurturing'],
                    
            'Fantasy': ['magic', 'fantasy', 'supernatural', 'dragons', 'wizards', 'mythical', 'spells', 'enchantment', 'sorcery', 'creatures', 'medieval', 'prophecy', 'portal', 'parallel worlds', 'sword', 'epic', 'gods', 'fairy tale', 'otherworldly', 'ancient magic', 'awe', 'wonder', 'imagination', 'hope', 'desire'],
                    
            'Romance': ['love', 'romance', 'relationship', 'passion', 'affection', 'courtship', 'heartbreak', 'chemistry', 'soulmate', 'intimacy', 'flirting', 'proposal', 'jealousy', 'date', 'romantic', 'first love', 'longing', 'secret admirer', 'wedding', 'honeymoon', 'desire', 'empathy', 'trust', 'connection', 'vulnerability'],
                    
            'Horror': ['scary', 'horror', 'fear', 'ghost', 'monster', 'terror', 'creepy', 'haunted', 'psychological', 'gore', 'jump scare', 'nightmare', 'serial killer', 'demonic', 'possessed', 'curse', 'creature', 'slasher', 'undead', 'zombie', 'darkness', 'blood', 'evil', 'stalker', 'scream', 'panic', 'dread', 'unease', 'shock'],
                    
            'Action': ['action', 'fight', 'war', 'explosion', 'combat', 'chase', 'rescue', 'battle', 'mission', 'hero', 'assassin', 'showdown', 'weapon', 'gunfight', 'adrenaline', 'martial arts', 'special forces', 'escape', 'survival', 'spy', 'enemy', 'revolution', 'duel', 'vigilante', 'courage', 'fearlessness', 'bravery', 'determination', 'strength'],
                    
            'Sci_Fi': ['sci-fi', 'aliens', 'space', 'future', 'robot', 'technology', 'time travel', 'cyberpunk', 'extraterrestrial', 'spaceship', 'galaxy', 'parallel universe', 'dystopia', 'clones', 'virtual reality', 'artificial intelligence', 'cyborg', 'android', 'terraforming', 'wormhole', 'curiosity', 'discovery', 'inspiration', 'isolation'],
                    
            'Drama': ['drama', 'emotional', 'life', 'struggle', 'relationship', 'family', 'conflict', 'personal growth', 'tragedy', 'heartbreak', 'sacrifice', 'redemption', 'betrayal', 'self-discovery', 'crisis', 'moral dilemma', 'grief', 'injustice', 'reconciliation', 'forgiveness', 'despair', 'hope', 'resilience', 'sympathy', 'compassion'],
                    
            'Adventure': ['adventure', 'explore', 'journey', 'quest', 'discovery', 'expedition', 'danger', 'wilderness', 'treasure', 'survival', 'challenge', 'exploration', 'uncharted', 'island', 'daring', 'heroic', 'wild', 'legend', 'artifact', 'map', 'compass', 'excitement', 'curiosity', 'thrill', 'boldness', 'anticipation'],
                    
            'Music': ['music', 'singing', 'band', 'concert', 'song', 'performance', 'lyrics', 'guitar', 'piano', 'melody', 'festival', 'orchestra', 'musical', 'album', 'recording', 'rhythm', 'harmony', 'composer', 'soundtrack', 'hit single', 'tune', 'note', 'emotion', 'joy', 'creativity', 'passion', 'inspiration'],

            'Fiction': ['storytelling', 'characters', 'narrative', 'conflict', 'identity', 'loneliness', 'despair', 'triumph', 'loss', 'hope', 'brokenness', 'resilience', 'emptiness', 'growth', 'emotional conflict'],

            'Detective and Mystery': ['detective', 'mystery', 'suspense', 'isolation', 'fear', 'paranoia', 'puzzle', 'anxiety', 'revelation', 'tension', 'secrets', 'justice', 'uncertainty', 'doubt', 'desperation'],

            'Christian Life': ['faith', 'struggle', 'forgiveness', 'redemption', 'inner peace', 'confusion', 'hope', 'guilt', 'empathy', 'spiritual conflict', 'forgiveness', 'compassion', 'sacrifice', 'healing'],

            'Adventure': ['journey', 'exploration', 'loneliness', 'danger', 'excitement', 'isolation', 'risk', 'fear', 'survival', 'uncertainty', 'inner strength', 'bravery', 'wilderness', 'unknown', 'endurance'],

            'American Fiction': ['individualism', 'struggle', 'freedom', 'identity', 'alienation', 'loss', 'hope', 'conflict', 'despair', 'rebuilding', 'brokenness', 'overcoming adversity', 'hope', 'disillusionment', 'dream'],

            'Fantasy Fiction': ['magic', 'supernatural', 'myth', 'epic', 'quest', 'inner struggle', 'destiny', 'sacrifice', 'mystery', 'conflict', 'mythical beings', 'escape', 'prophecy', 'hope', 'overcoming darkness'],

            'Science Fiction': ['technology', 'future', 'isolation', 'alienation', 'progress', 'paranoia', 'dystopia', 'utopia', 'fear', 'identity', 'existential crisis', 'innovation', 'struggle', 'hope', 'artificial intelligence'],

            'Juvenile Fiction': ['innocence', 'friendship', 'adventure', 'growing up', 'emotional conflict', 'family', 'learning', 'hope', 'loneliness', 'belonging', 'self-discovery', 'fear', 'imagination', 'exploration', 'wholesome values'],

            'Historical Fiction': ['history', 'struggle', 'sacrifice', 'identity', 'survival', 'loss', 'emotional depth', 'alienation', 'hope', 'triumph', 'cultural conflict', 'change', 'brokenness', 'resilience', 'renewal'],

            'Drama': ['relationships', 'inner turmoil', 'heartbreak', 'loneliness', 'grief', 'emotional conflict', 'redemption', 'self-discovery', 'betrayal', 'loss', 'resilience', 'healing', 'brokenness', 'hope', 'reconciliation'],

            'Country Life': ['nature', 'simplicity', 'community', 'loneliness', 'values', 'family bonds', 'conflict', 'peace', 'solitude', 'hardships', 'emotional growth', 'connection to the land', 'isolation', 'endurance', 'healing'],

            'Arthurian Romances': ['chivalry', 'honor', 'epic quests', 'betrayal', 'sacrifice', 'inner conflict', 'loneliness', 'heroism', 'destiny', 'legend', 'tragedy', 'nobility', 'forgiveness', 'hope'],

            'Dysfunctional Families': ['conflict', 'alienation', 'emotional struggle', 'loneliness', 'resentment', 'healing', 'grief', 'brokenness', 'communication issues', 'guilt', 'forgiveness', 'trauma', 'inner turmoil', 'reconciliation', 'hope'],

            'Christmas Stories': ['family', 'togetherness', 'joy', 'loss', 'hope', 'emotional warmth', 'loneliness', 'compassion', 'healing', 'celebration', 'generosity', 'grief', 'connection', 'miracles', 'tradition'],

            'Human Cloning': ['identity', 'existential crisis', 'alienation', 'technology', 'fear of the unknown', 'inner conflict', 'loneliness', 'struggle for meaning', 'moral dilemmas', 'isolation', 'self-awareness', 'loss of self', 'hope', 'humanity', 'consciousness'],

            'Literary Collections': ['reflections', 'human experience', 'emotional depth', 'existentialism', 'grief', 'loneliness', 'hope', 'growth', 'self-discovery', 'resilience', 'fear', 'loss', 'joy', 'brokenness', 'rebuilding'],
            
            'Sex': ['sex','fuck','adult','desire', 'intimacy', 'passion', 'lust', 'physical attraction', 'romantic connection', 'sensuality', 'seduction', 'vulnerability', 'consent', 'taboo', 'infidelity', 'heartbreak', 'sexual tension', 'emotional connection', 'sexual identity', 'eroticism', 'temptation', 'jealousy', 'power dynamics', 'shame', 'guilt', 'pleasure', 'fantasy', 'regret', 'obsession', 'trust', 'betrayal', 'exploration', 'boundaries'],
            
            'Bollywood': ['hindi', 'bollywood', 'indian cinema', 'hindi movies', 'bollywood films'],

            'Hindi Songs': ['hindi', 'bollywood', 'hindustani', 'desi', 'hindipop', 'hindimusic'],
            
            'Punjabi Songs': ['punjabi', 'bhangra', 'punjabimusic'],
            
            'Tamil Songs': ['tamil', 'kollywood', 'tamilmusic'],

    }

    matched_genres = []
    for domain, keywords in keyword_lst.items():
        if any(keyword in cleaned_text for keyword in keywords):
            matched_genres.append(domain)
    
    print("Matched Genres:", matched_genres)  # Debugging line
    return matched_genres

def recommend_genre_based_books(genre):
    print(f"Genres for book recommendation: {genre}")

    if 'categories' in books.columns and books['categories'].notna().any():
        new_recommended = books[books['categories'].str.contains('|'.join(genre), case=False, na=False)]
        return new_recommended['title'].head(5).tolist()
    else:
        print("No matching books found or 'categories' column missing.")
        return []

def recommend_popular_books():
    popular_books = books.sort_values(by='published_year', ascending=False)
    return popular_books['title'].head(5).tolist()

def recommend_genre_based_movies(genres):
    recommended = movies[movies['tags'].str.contains('|'.join(genres), case=False)]
    return recommended['title'].head(5).tolist()

def recommend_popular_movies():
    popular_movies = movies.sort_values(by='movie_id', ascending=False)
    return popular_movies['title'].head(5).tolist()

def recommend_genre_based_songs(genre):
    # Correct usage of 'genre' instead of 'genres'
    recommended = songs[songs['text'].str.contains('|'.join(genre), case=False)]
    return recommended['song'].head(5).tolist()

def recommend_popular_songs():
    popular_song = songs.sort_values(by='artist', ascending=False)
    return popular_song['song'].head(5).tolist()

# Main loop
while True:
    user_input = input("Enter a text: ")

    if user_input.lower() == 'exit':
        print('Exiting the program')
        break
    
    # Sentiment analysis
    sentiment_probs = analyze_sentiment(user_input)
    max_sentiment = max(sentiment_probs, key=sentiment_probs.get)
    print('\nSentiment Analysis:')
    for label, probability in sentiment_probs.items():
        print(f"{label}: {probability:.4f}")
    print("Max sentiment:", max_sentiment)

    # Extract genres from input text
    extracted_movie_genres = extract_movies(user_input)
    print("Extracted Genres:", extracted_movie_genres)

    # Movie recommendations
    if extracted_movie_genres:
        recommended_movies = recommend_genre_based_movies(extracted_movie_genres)
    else:
        recommended_movies = recommend_popular_movies()

    if recommended_movies:
        print("\nRecommended Movies:")
        for movie in recommended_movies:
            print(movie)
    else:
        print("No movies found in the dataset")
        
    # Book recommendations
    if extracted_movie_genres:
        recommended_books = recommend_genre_based_books(extracted_movie_genres)
    else:
        recommended_books = recommend_popular_books()

    if recommended_books:
        print("\nRecommended Books:")
        for book in recommended_books:
            print(book)
    else:
        print("No books found in the dataset.")

    # Song recommendations
    if extracted_movie_genres:
        recommended_songs = recommend_genre_based_songs(extracted_movie_genres)
    else:
        recommended_songs = recommend_popular_songs()

    if recommended_songs:
        print("\nRecommended Songs:")
        for song in recommended_songs:
            print(song)
    else:
        print("No songs found in the dataset.")




Enter a text:  hey you



Sentiment Analysis:
Negative: 0.2045
Neutral: 0.5906
Positive: 0.2049
Max sentiment: Neutral
Cleaned Text: hey you
Matched Genres: []
Extracted Genres: []

Recommended Movies:
Running Forever
8 Days
Growing Up Smith
Midnight Cabaret
Puss in Boots

Recommended Books:
A Commentary Upon the Gospel According to S. Luke
Nöddebo Parsonage
Palace Walk
The Story of the Life of Lafayette
Collections of the Massachusetts Historical Soceity

Recommended Songs:
Pene
You're Losing Me
Spades
Nick Of Time
Unchangeable


Enter a text:  love the way you lie



Sentiment Analysis:
Negative: 0.5534
Neutral: 0.3401
Positive: 0.1065
Max sentiment: Negative
Cleaned Text: love the way you lie
Matched Genres: ['Romance']
Extracted Genres: ['Romance']

Recommended Movies:
Killers
Friends with Benefits
The Young Victoria
The Bodyguard
New York, New York
Genres for book recommendation: ['Romance']

Recommended Books:
The Once and Future King
Sir Gawain and the Green Knight

Recommended Songs:
Alone
Heaven
It's Your Life
National Anthem
Cross My Heart


Enter a text:  I love hindi movie



Sentiment Analysis:
Negative: 0.0026
Neutral: 0.0133
Positive: 0.9841
Max sentiment: Positive
Cleaned Text: i love hindi movie
Matched Genres: ['Romance', 'Bollywood', 'Hindi Songs']
Extracted Genres: ['Romance', 'Bollywood', 'Hindi Songs']

Recommended Movies:
Killers
The Love Guru
Baahubali: The Beginning
Friends with Benefits
The Young Victoria
Genres for book recommendation: ['Romance', 'Bollywood', 'Hindi Songs']

Recommended Books:
The Once and Future King
Sir Gawain and the Green Knight

Recommended Songs:
Alone
Heaven
It's Your Life
National Anthem
Cross My Heart


Enter a text:  bollywood songs



Sentiment Analysis:
Negative: 0.0953
Neutral: 0.7651
Positive: 0.1396
Max sentiment: Neutral
Cleaned Text: bollywood songs
Matched Genres: ['Music', 'Bollywood', 'Hindi Songs']
Extracted Genres: ['Music', 'Bollywood', 'Hindi Songs']

Recommended Movies:
Tangled
Frozen
Happy Feet Two
The Princess and the Frog
The Hunchback of Notre Dame
Genres for book recommendation: ['Music', 'Bollywood', 'Hindi Songs']

Recommended Books:
A Hard Day's Write, 3e
Midnight Riders
The Real Frank Zappa Book
A Day in the Life
Fourth of July mice

Recommended Songs:
Saturday Night Fish Fry
I Believe In You
Sitting At The Wheel
Here's To My Lady
Back In '72


Enter a text:  exit


Exiting the program


In [18]:
import pickle

# Save the model to a pickle file
with open('sentiment_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save the tokenizer to a pickle file
with open('sentiment_tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

# Assuming you have a pandas DataFrame for books, movies, and songs datasets
with open('books_dataset.pkl', 'wb') as f:
    pickle.dump(books, f)

with open('movies_dataset.pkl', 'wb') as f:
    pickle.dump(movies, f)

with open('songs_dataset.pkl', 'wb') as f:
    pickle.dump(songs, f)