In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Example dataset of movies with genres
movies_data = pd.DataFrame({
    'movie_id': [1, 2, 3, 4, 5],
    'movie_title': ['The Matrix', 'The Avengers', 'Titanic', 'Inception', 'The Lion King'],
    'genres': ['Action|Sci-Fi', 'Action|Adventure|Sci-Fi', 'Drama|Romance', 'Action|Sci-Fi|Thriller', 'Animation|Adventure|Drama']
})

# Convert genres into a binary vector (One-Hot Encoding)
genres_all = set('|'.join(movies_data['genres']).split('|'))
genres_all = list(genres_all)
print(genres_all)

# Create a function to convert genres into a binary vector
def genre_vector(genres, all_genres):
    genre_vector = [1 if genre in genres.split('|') else 0 for genre in all_genres]
    return genre_vector

# Apply genre vector to the movies
movies_data['genre_vector'] = movies_data['genres'].apply(lambda x: genre_vector(x, genres_all))

# Let's say the user has liked the following movies: "The Matrix" and "Inception"
user_liked_movies = ['The Matrix']

# Get the genre vectors for the movies the user liked
user_liked_genres = movies_data[movies_data['movie_title'].isin(user_liked_movies)]['genre_vector'].tolist()

# Create the user profile as the average of the genre vectors of the liked movies
import numpy as np
user_profile = np.mean(user_liked_genres, axis=0)

# Calculate cosine similarity between the user profile and all movie genre vectors
movie_vectors = np.array(movies_data['genre_vector'].tolist())
print(movie_vectors)
cosine_similarities = cosine_similarity([user_profile], movie_vectors)

# Add the similarity scores to the movies dataframe
movies_data['similarity'] = cosine_similarities.flatten()

# Sort the movies based on similarity to the user profile (higher similarity = better match)
recommended_movies = movies_data.sort_values(by='similarity', ascending=False)

# Recommend movies to the user (excluding the ones they have already watched/liked)
recommended_movies = recommended_movies[~recommended_movies['movie_title'].isin(user_liked_movies)]

# Print the recommended movies
print("Recommended Movies for the User:")
for index, row in recommended_movies.iterrows():
    print(f"Movie: {row['movie_title']}, Similarity Score: {row['similarity']:.4f}")


['Animation', 'Sci-Fi', 'Drama', 'Romance', 'Adventure', 'Thriller', 'Action']
[[0 1 0 0 0 0 1]
 [0 1 0 0 1 0 1]
 [0 0 1 1 0 0 0]
 [0 1 0 0 0 1 1]
 [1 0 1 0 1 0 0]]
Recommended Movies for the User:
Movie: The Avengers, Similarity Score: 0.8165
Movie: Inception, Similarity Score: 0.8165
Movie: Titanic, Similarity Score: 0.0000
Movie: The Lion King, Similarity Score: 0.0000


In [76]:
# Grab Apartment Data
apartmentData = pd.read_csv('boston_apartment_listings.csv')
# print(apartmentData.head())

apartmentRent = apartmentData['Rent']
minRent = min(apartmentRent)
maxRent = max(apartmentRent)
apartmentRent = (apartmentData['Rent'] - minRent) / (maxRent - minRent)

# FIX LATER
apartmentOverallCrime = [0 if np.isnan(val) else val for val in apartmentData['Overall CrimeRate']]
apartmentViolentCrime = [0 if np.isnan(val) else val for val in apartmentData['Violent CrimeRate']]

apartmentFeatures = list(zip(apartmentRent, apartmentData['Bed'], apartmentData['Bath'], apartmentOverallCrime, apartmentViolentCrime))

apartmentVectors = np.array(apartmentFeatures)

input = np.array([[(2950 - minRent) / (maxRent - minRent), 2.0, 1.0, 1.86, 0.2866]])
cosineSimilarity = cosine_similarity(input, apartmentVectors)
apartmentData['similarity'] = cosineSimilarity.flatten()

recommend = apartmentData.sort_values(by='similarity', ascending=False)

print("Recommended Apartments for the User:")
i = 0
for index, row in recommend.iterrows():
    print(f"Address  : {row['Address']}, Similarity Score: {row['similarity']:.4f}")
    print(row['Rent'], (row['Rent'] - minRent) / (maxRent - minRent), row['Bed'], row['Bath'], row['Overall CrimeRate'], row['Violent CrimeRate'])
    i += 1

    if i == 3:
        break

Recommended Apartments for the User:
Address  : 165 Hemenway Unit 5, Similarity Score: 1.0000
2950 0.17045454545454544 2.0 1.0 1.86 0.2866
Address  : 315 Huntington Unit 2B, Similarity Score: 0.9999
3100 0.20454545454545456 2.0 1.0 1.86 0.2866
Address  : 238 Hemenway Unit b5, Similarity Score: 0.9998
3195 0.22613636363636364 2.0 1.0 1.86 0.2866
