In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

# Step 1: Simulated user-item interaction matrix (for simplicity)
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4, 5],
    'item_id': [101, 102, 103, 101, 104, 102, 103, 101, 104],
    'rating': [5, 3, 4, 4, 2, 3, 5, 1, 4]
}
ratings_df = pd.DataFrame(data)

# Step 2: Creating the user-item matrix
user_item_matrix = ratings_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

# Convert the user-item matrix to a sparse matrix
user_item_sparse = csr_matrix(user_item_matrix.values)

# Step 3: Decomposing the matrix using SVD
U, sigma, Vt = svds(user_item_sparse, k=2)
sigma = np.diag(sigma)

# Step 4: Reconstructing the predicted ratings matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

# Step 5: Function to recommend items
def recommend_items(user_id, num_recommendations=2):
    user_row = predicted_ratings_df.loc[user_id].sort_values(ascending=False)
    recommendations = user_row[
        user_row.index.difference(
            user_item_matrix.loc[user_id].replace(0, np.nan).dropna().index
        )
    ]
    return recommendations.head(num_recommendations)

# Example usage
print("Predicted Ratings Matrix:")
print(predicted_ratings_df)

user_id = 1
print(f"\nRecommendations for User {user_id}:")
print(recommend_items(user_id))


Predicted Ratings Matrix:
item_id       101       102       103       104
user_id                                        
1        4.451654  3.006198  4.371658  0.893116
2        3.793758  0.187971  0.015036  2.321693
3        0.843722  2.908122  4.483477 -1.367898
4        0.742945  0.103252  0.106793  0.410989
5        1.643956 -0.450073 -0.824275  1.355474

Recommendations for User 1:
item_id
104    0.893116
Name: 1, dtype: float64


# Dataset Code

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

# Step 1: Load the dataset
# Replace 'path_to_reviews.csv' with the actual path to the dataset
reviews_df = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')

# Step 2: Prepare the user-item interaction matrix
# Pivot the data to create a user-item matrix
user_item_matrix = reviews_df.pivot(index='ID', columns='Clothing ID', values='Rating').fillna(0)

# Convert the user-item matrix to a sparse matrix
user_item_sparse = csr_matrix(user_item_matrix.values)

# Step 3: Perform SVD on the user-item matrix
U, sigma, Vt = svds(user_item_sparse, k=2)  # Set `k` based on the desired latent dimensions
sigma = np.diag(sigma)

# Step 4: Reconstruct the predicted ratings matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

# Step 5: Function to recommend items
def recommend_items(user_id, num_recommendations=5):
    """
    Recommend items to a user based on the predicted ratings.

    :param user_id: ID of the user for whom recommendations are needed
    :param num_recommendations: Number of recommendations to return
    :return: A DataFrame of recommended items and their predicted ratings
    """
    if user_id not in predicted_ratings_df.index:
        return f"User ID {user_id} not found in the dataset."

    # Get the predicted ratings for the user
    user_row = predicted_ratings_df.loc[user_id].sort_values(ascending=False)

    # Exclude items the user has already rated
    rated_items = user_item_matrix.loc[user_id].replace(0, np.nan).dropna().index
    recommendations = user_row[user_row.index.difference(rated_items)]

    return recommendations.head(num_recommendations)

# Example Usage
print("Predicted Ratings Matrix:")
print(predicted_ratings_df)

user_id = 767  # Replace with an actual user ID from your dataset
print(f"\nRecommendations for User {user_id}:")
print(recommend_items(user_id))


Predicted Ratings Matrix:
Clothing ID          0             1             2             3     \
ID                                                                    
0           -1.008824e-33 -5.380830e-50 -7.209971e-34 -2.032891e-33   
1            8.551410e-51  5.586963e-32  4.346371e-32  1.151743e-31   
2           -1.188838e-32  3.670061e-33 -5.641396e-33 -1.639063e-32   
3           -2.725820e-35  2.811238e-32  2.185051e-32  5.789828e-32   
4            7.857383e-34  1.863683e-33  2.011410e-33  5.425301e-33   
...                   ...           ...           ...           ...   
23481       -1.368795e-32  2.112409e-32  6.650812e-33  1.596424e-32   
23482        8.062081e-32 -1.600130e-16 -1.244819e-16 -3.298640e-16   
23483       -8.212769e-33  1.267446e-32  3.990487e-33  9.578545e-33   
23484       -3.228236e-35 -1.927421e-33 -1.522507e-33 -4.038398e-33   
23485       -1.368795e-32  2.112409e-32  6.650812e-33  1.596424e-32   

Clothing ID          4             5             6

The output you provided shows two key components:

### 1. **Predicted Ratings Matrix:**
   This matrix represents the predicted ratings for each user-item pair. The rows correspond to different users (indexed by `ID`), and the columns correspond to different clothing items (indexed by `Clothing ID`). Each cell in the matrix contains a predicted rating for that user-item pair, which is calculated by matrix factorization (SVD in this case).

   - **Values in the matrix:** These are the predicted ratings for a given user and clothing item. Since these are derived from latent factors and the underlying patterns in the data, the values may not always be "real" ratings (i.e., integers or floats within a standard rating range). These values are real numbers but may not always be within the typical rating range (e.g., 1-5). Negative values and very small values could be a result of the matrix factorization approximating preferences based on user-item interactions.

   - **Shape:** The matrix appears to have dimensions of `23486 users x 1206 items`, with the actual predicted ratings being filled for each user-item pair.

### 2. **Recommendations for User 767:**
   This section shows the top recommended clothing items for **User 767** based on the predicted ratings.

   - **Clothing ID:** These are the items that the system recommends for the user.
   
   - **Predicted Ratings:** The numbers next to each `Clothing ID` represent the predicted ratings for each clothing item for **User 767**. The items with the highest predicted ratings are recommended.

   - For example, for **User 767**, the recommended clothing items are:
     1. `Clothing ID 0` with a predicted rating of `2.300867e-33`
     2. `Clothing ID 1` with a predicted rating of `1.566009e-32`
     3. `Clothing ID 2` with a predicted rating of `1.382715e-32`
     4. `Clothing ID 3` with a predicted rating of `3.691950e-32`
     5. `Clothing ID 4` with a predicted rating of `9.421314e-34`

   - These predicted ratings are derived based on latent factors in the matrix and represent how much the system predicts the user will like each item, even if the user hasn't rated them yet.

---

### **Interpretation of the Data:**
1. **Predicted Ratings:** These are approximations based on the user-item interactions and latent factors discovered through SVD. They don't necessarily represent actual ratings but show the system's prediction of user preferences.
   
2. **Small Predicted Ratings:** The very small numbers (like `2.300867e-33`) might indicate that the algorithm is having difficulty estimating preferences for certain items. This can happen if the model is not trained well enough or if there is insufficient data.

3. **Recommendations:** Based on the predicted ratings, the system is suggesting the top items with the highest predicted scores, which are intended to be the items that **User 767** might prefer, even though they haven't rated them yet.

### What to Improve:
- You may want to scale the predicted ratings to a standard range (e.g., 1 to 5) if that's expected.
- The very small values (near zero or negative) can often indicate issues with matrix factorization, such as a poor choice of `k` (latent factors). You can try increasing `k` or use other methods for improving the accuracy of the SVD model.
- It's also possible that the model could benefit from further tuning or more user-item interaction data.

Let me know if you'd like help with improving the model or interpreting it further!