Step-by-Step Implementation

1. Data Preparation: We need user-item interaction data.
2. Similarity Calculation: Compute the similarity between users.
3. Prediction: Predict ratings for items based on similar users' ratings.

In [4]:
import pandas as pd
# Create the dataset
data_dict = {
    'user_id': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D', 'E', 'E', 'E'],
    'item_id': ['Item1', 'Item2', 'Item3', 'Item1', 'Item2', 'Item3', 'Item1', 'Item2', 'Item3', 'Item1', 'Item2', 'Item3', 'Item1', 'Item2', 'Item3'],
    'rating': [5, 3, 4, 3, 1, 2, 4, 3, 5, 4, 2, 4, 1, 5, 4]
}

df = pd.DataFrame(data_dict)

# Print the dataset
print(df)


   user_id item_id  rating
0        A   Item1       5
1        A   Item2       3
2        A   Item3       4
3        B   Item1       3
4        B   Item2       1
5        B   Item3       2
6        C   Item1       4
7        C   Item2       3
8        C   Item3       5
9        D   Item1       4
10       D   Item2       2
11       D   Item3       4
12       E   Item1       1
13       E   Item2       5
14       E   Item3       4


In [2]:
# Create the user-item matrix
user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)

# Print the user-item matrix
print(user_item_matrix)


item_id  Item1  Item2  Item3
user_id                     
A          5.0    3.0    4.0
B          3.0    1.0    2.0
C          4.0    3.0    5.0
D          4.0    2.0    4.0
E          1.0    5.0    4.0


In [5]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Print the user similarity matrix
print(user_similarity_df)


user_id         A         B         C         D         E
user_id                                                  
A        1.000000  0.982708  0.980000  0.989949  0.785584
B        0.982708  1.000000  0.944911  0.979958  0.659829
C        0.980000  0.944911  1.000000  0.989949  0.851050
D        0.989949  0.979958  0.989949  1.000000  0.771517
E        0.785584  0.659829  0.851050  0.771517  1.000000


In [18]:
def predict_ratings(user_id, item_id):
    # Get the similarity scores for the target user
    similar_users = user_similarity_df[user_id].drop(user_id)
    
    # Get the ratings for the target item from similar users
    similar_users_ratings = user_item_matrix.loc[similar_users.index, item_id]
    
    # Compute the predicted rating as the mean of the ratings from the most similar users
    predicted_rating = similar_users_ratings[similar_users_ratings > 0].mean()
    
    return predicted_rating

# Predict a specific rating
user_id = 'A'
item_id = 'Item2'
predicted_rating = predict_ratings(user_id, item_id)
print(f"Predicted rating for user {user_id} on item {item_id}: {predicted_rating}")


Predicted rating for user A on item Item2: 2.75


Pearson correlation

In [None]:
# Pearson correlation, also known as Pearson's r, is a measure of the linear relationship between two continuous variables. It ranges from -1 to 1, where:

# 𝑟
# =
# 1
# r=1 indicates a perfect positive linear relationship.
# 𝑟
# =
# −
# 1
# r=−1 indicates a perfect negative linear relationship.
# 𝑟
# =
# 0
# r=0 indicates no linear relationship.



In [1]:
import numpy as np
import pandas as pd

# Example data
data = {'x': [1, 2, 3, 4, 5],
        'y': [2, 3, 4, 5, 6]}

df = pd.DataFrame(data)

# Calculate Pearson correlation using pandas
pearson_corr = df.corr(method='pearson')

print(pearson_corr)


     x    y
x  1.0  1.0
y  1.0  1.0
