In [37]:
import pandas as pd
import re
from collections import defaultdict

In [40]:
books = pd.read_csv('books.csv')
movies = pd.read_csv('movies.csv')

In [41]:
# Step 2: Limit to 4000 books and 4000 movies
books = books.head(4000)  # Select the first 4000 rows
movies = movies.head(4000)  # Select the first 4000 rows

In [42]:
def clean_genres(genres):
    # Check if genres is a string; if not, return an empty string
    if not isinstance(genres, str):
        return ''  # Handle non-string values
    # Remove semicolons and brackets, keep commas
    genres = re.sub(r'[;\\[\\]]', ' ', genres)  # Remove semicolons and brackets
    genres = re.sub(r'\\s+', ' ', genres)  # Replace multiple spaces with a single space
    return genres.strip()  # Remove leading and trailing spaces

# Apply the cleaning function to the 'genres' column
books['genres'] = books['genres'].apply(clean_genres)

# Display the cleaned DataFrame
print(books[['Book Id', 'Title', 'genres']])

      Book Id                                              Title  \
0           1  Harry Potter and the Half-Blood Prince (Harry ...   
1           2  Harry Potter and the Order of the Phoenix (Har...   
2           4  Harry Potter and the Chamber of Secrets (Harry...   
3           5  Harry Potter and the Prisoner of Azkaban (Harr...   
4           8  Harry Potter Boxed Set  Books 1-5 (Harry Potte...   
...       ...                                                ...   
3995    14188  The Minority Report (Collected Stories of Phil...   
3996    14194                                 A Man's Protection   
3997    14196                Jonathan Strange y el señor Norrell   
3998    14214                                             Hunger   
3999    14227                                         The Diezmo   

                                                 genres  
0     Fantasy;Young Adult;Fiction;Fantasy,Magic;Chil...  
1     Fantasy;Young Adult;Fiction;Fantasy,Magic;Chil...  
2     Fan

In [48]:
# Step 3: Extract keywords from genres
def extract_keywords(df, genre_column):
    keyword_mapping = defaultdict(set)
    
    for index, row in df.iterrows():
        genres = str(row[genre_column]).split(',')  # Assuming genres are separated by commas
        for genre in genres:
            keywords = genre.strip().lower().split()  # Split into keywords
            for keyword in keywords:
                keyword_mapping[keyword].add(genre.strip().lower())
    
    return keyword_mapping

In [49]:
# Extract keywords from both books and movies
book_keywords = extract_keywords(books, 'genres')
movie_keywords = extract_keywords(movies, 'genres')

In [60]:
# Recommendation function
def recommend_based_on_genre(selected_genre):
    selected_keywords = selected_genre.lower().strip().split(',')
    
    recommended_books = set()
    recommended_movies = set()
    
    for keyword in selected_keywords:
        keyword = keyword.strip()
        if keyword in book_keywords:
            genres = book_keywords[keyword]
            for genre in genres:
                # Filter the books by genre and add their titles
                titles = books[books['genres'].str.contains(genre, case=False, na=False)]['Title']
                recommended_books.update(titles.tolist())  # Convert to list and update the set
        if keyword in movie_keywords:
            genres = movie_keywords[keyword]
            for genre in genres:
                # Filter the movies by genre and add their titles
                titles = movies[movies['genres'].str.contains(genre, case=False, na=False)]['title']
                recommended_movies.update(titles.tolist())  # Convert to list and update the set
    
    return recommended_books, recommended_movies
# Example usage
selected_genre = "Fantasy, Magic"  # Change this to the genre you want to explore
recommended_books, recommended_movies = recommend_based_on_genre(selected_genre)



In [58]:
# Print recommendations
print(f"Recommended Books for genres '{selected_genre}':")
for book in recommended_books:
    print(book)

Recommended Books for genres 'Fantasy, Magic':
The Tale of Genji
The Legend of Luke (Redwall  #12)
A Shadow in Summer (Long Price Quartet  #1)
Myths of the Underworld Journey: Plato  Aristophanes  and the "Orphic" Gold Tablets
Jinx High (Diana Tregarde  #3)
Collected Stories
The Confusion (The Baroque Cycle  #2)
The Bacchae of Euripides: A Communion Rite
Euripides II: The Cyclops / Heracles / Iphigenia in Tauris / Helen
Bakkhai
The Ultimate Hitchhiker's Guide: Five Complete Novels and One Story (Hitchhiker's Guide to the Galaxy  #1-5)
The Littles and Their Amazing New Friend
The Talisman
The Aleph and Other Stories
Death Note  Vol. 3: Hard Run (Death Note  #3)
El Aleph
First Meetings in Ender's Universe (Ender's Saga  #0.5)
Tsubasa: RESERVoir CHRoNiCLE  Vol. 10
Chronik eines angekündigten Todes
Hikaru no Go  Vol. 1: Descent of the Go Master (Hikaru no Go  #1)
The Adventures of Jimmy Skunk
L'Échiquier du mal
Little Miss Birthday
The Annotated Gulliver's Travels
Witches Abroad (Discworld

In [59]:
print(f"\nRecommended Movies for genres '{selected_genre}':")
for movie in recommended_movies:
    print(movie)


Recommended Movies for genres 'Fantasy, Magic':
The Craft
Highlander: Endgame
Mirror Mirror
The Kid
E.T. the Extra-Terrestrial
Rise of the Guardians
The Chronicles of Narnia: Prince Caspian
Stardust
Underworld
Army of Darkness
Bulletproof Monk
Dawn of the Dead
City of Angels
Time Bandits
Peggy Sue Got Married
Maleficent
The Ant Bully
Hellboy
Immortals
Big Fish
The Last Legion
2046
Cat People
Hereafter
The Tempest
The Mummy: Tomb of the Dragon Emperor
The Croods
The Lovely Bones
Big Trouble in Little China
Phenomenon
The Snow Queen
The Brothers Grimm
Hancock
Perfume: The Story of a Murderer
Krampus
Mars Attacks!
Mirrormask
A Monster in Paris
The Princess Bride
Thor: The Dark World
The Barbarians
Teen Wolf Too
Small Soldiers
Oz: The Great and Powerful
Howl's Moving Castle
The Cat in the Hat
Lady in the Water
DragonHeart
Batman
The Promise
Doctor Dolittle
How to Train Your Dragon
The Wizard of Oz
Just Like Heaven
Batman Forever
The Monkey King 2
Tuck Everlasting
The Adventurer: The Curse