In [1]:
# ==========================================
# CODTECH IT SOLUTIONS INTERNSHIP
# TASK 4: RECOMMENDATION SYSTEM
# ==========================================

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. Create a Synthetic Dataset
# Dictionary of users and their ratings for different movies
data = {
    'User_ID': [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5],
    'Movie': ['Matrix', 'Titanic', 'Avengers', 'Inception',
              'Matrix', 'Avengers', 'Inception',
              'Titanic', 'Avengers', 'Frozen',
              'Matrix', 'Inception', 'Frozen',
              'Titanic', 'Frozen'],
    'Rating': [5, 2, 4, 5,
               5, 3, 5,
               4, 2, 1,
               4, 5, 2,
               5, 4]
}

df = pd.DataFrame(data)
print("--- User Ratings Dataset ---")
print(df)

# 2. Create User-Item Matrix
# Rows = Users, Columns = Movies, Values = Ratings
user_item_matrix = df.pivot_table(index='User_ID', columns='Movie', values='Rating')

# Fill NaN with 0 (assuming 0 means not seen/rated)
user_item_matrix.fillna(0, inplace=True)

print("\n--- User-Item Matrix ---")
print(user_item_matrix)

# 3. Calculate Similarity (Item-Based Collaborative Filtering)
# We transpose the matrix to find similarity between Movies (items) instead of Users
item_similarity = cosine_similarity(user_item_matrix.T)

# Convert to DataFrame for readability
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

print("\n--- Item Similarity Matrix (Cosine Similarity) ---")
print(item_similarity_df)

# 4. Recommendation Function
def get_recommendations(movie_name, similarity_matrix, num_recommendations=2):
    # Get similarity scores for the movie
    similar_scores = similarity_matrix[movie_name]

    # Sort scores in descending order (excluding the movie itself)
    similar_movies = similar_scores.sort_values(ascending=False).drop(movie_name)

    return similar_movies.head(num_recommendations)

# 5. Generate Recommendations
test_movie = 'Inception'
recommendations = get_recommendations(test_movie, item_similarity_df)

print(f"\n--- Recommendations for '{test_movie}' ---")
for movie, score in recommendations.items():
    print(f"Movie: {movie}, Similarity Score: {score:.4f}")

# 6. Evaluation (Conceptual)
# In real scenarios, we split data and calculate RMSE.
# Here, we demonstrate logic: If a user likes 'Inception', they are recommended 'Matrix' because
# users who rated 'Inception' high also rated 'Matrix' high (Similarity Score close to 1).

--- User Ratings Dataset ---
    User_ID      Movie  Rating
0         1     Matrix       5
1         1    Titanic       2
2         1   Avengers       4
3         1  Inception       5
4         2     Matrix       5
5         2   Avengers       3
6         2  Inception       5
7         3    Titanic       4
8         3   Avengers       2
9         3     Frozen       1
10        4     Matrix       4
11        4  Inception       5
12        4     Frozen       2
13        5    Titanic       5
14        5     Frozen       4

--- User-Item Matrix ---
Movie    Avengers  Frozen  Inception  Matrix  Titanic
User_ID                                              
1             4.0     0.0        5.0     5.0      2.0
2             3.0     0.0        5.0     5.0      0.0
3             2.0     1.0        0.0     0.0      4.0
4             0.0     2.0        5.0     4.0      0.0
5             0.0     4.0        0.0     0.0      5.0

--- Item Similarity Matrix (Cosine Similarity) ---
Movie      Avengers