In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load and preprocess the dataset
data = pd.read_csv('cleaned_data.csv')
data = data[['Clothing ID', 'Age', 'Recommended IND', 'Review Text', 'Rating']]
data.dropna(subset=['Review Text'], inplace=True)
data.reset_index(drop=True, inplace=True)
data['Review Text'] = data['Review Text'].str.lower()

# Step 2: Create the item-user matrix
item_user_matrix = data.pivot_table(index='Clothing ID', columns='Age', values='Rating', fill_value=0)
item_user_matrix = item_user_matrix.to_numpy()

AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [None]:
# Step 3: Calculate cosine similarity
item_similarity = cosine_similarity(item_user_matrix)
item_similarity_df = pd.DataFrame(item_similarity, index=item_user_matrix.index, columns=item_user_matrix.index

In [None]:
# Step 4: Feature engineering - Process text data for 'Review Text'
vectorizer = CountVectorizer(stop_words='english')
review_matrix = vectorizer.fit_transform(data['Review Text'])
review_similarity = cosine_similarity(review_matrix)
review_similarity_df = pd.DataFrame(review_similarity, index=data.index, columns=data.index)

In [None]:
# Step 5: Combine the similarity matrices using weighted average (you can adjust the weights as needed)
alpha = 0.7
combined_similarity = alpha * item_similarity_df + (1 - alpha) * review_similarity_df

# Step 6: Define the recommendation function
def get_recommendations(clothing_id, top_n=5):
    # Get the combined similarity scores for the given clothing_id
    sim_scores = combined_similarity[clothing_id]

    # Get the top_n most similar clothing items
    top_items = sim_scores.nlargest(top_n + 1).drop(clothing_id)
    
    # Get the clothing details for the top items
    recommended_items = data[data['Clothing ID'].isin(top_items.index)]

    return recommended_items

In [None]:


# Step 7: Get recommendations for a specific item and save to a CSV file
def save_recommendations_to_csv(clothing_id, top_n=5, file_path='recommendations.csv'):
    # Get the combined similarity scores for the given clothing_id
    sim_scores = combined_similarity[clothing_id]

    # Get the top_n most similar clothing items
    top_items = sim_scores.nlargest(top_n + 1).drop(clothing_id)
    
    # Get the clothing details for the top items
    recommended_items = data[data['Clothing ID'].isin(top_items.index)]
    
    # Save the recommendations to a new CSV file
    recommended_items.to_csv(file_path, index=False)

# Get recommendations for a specific clothing item (e.g., Clothing ID 123) and save to CSV
clothing_id = 123  # Replace with the desired clothing ID
save_recommendations_to_csv(clothing_id, top_n=5, file_path='recommendations.csv')