In [None]:
import numpy as np
import pandas as pd

class SmartRecs:
    def __init__(self, transactions_file, products_file):
        self.transactions = pd.read_csv(transactions_file) # Load transactions data
        self.products = pd.read_csv(products_file) # Load products data
        self.top_k = None

    def preprocess_data(self):
        # Keep only relevant columns for transactions and products data
        self.transactions = self.transactions[['user_id', 'product_id', 'order_number', 'add_to_cart_order']]
        self.products = self.products[['product_id', 'product_name']]
        # Calculate maximum order number per user
        self.transactions['max_order'] = self.transactions.groupby('user_id')['order_number'].transform('max')
        # Calculate maximum order number per user
        self.transactions['weight_order'] = self.transactions['order_number'] / self.transactions['max_order']
        # Calculate cumulative weight per user and product
        self.transactions['weight_order'] = self.transactions.groupby(['user_id', 'product_id'])['weight_order'].cumsum()

    def update_data(self, new_transactions_file=None, new_products_file=None):
        if new_transactions_file is not None:
            # Load new transactions data
            new_transactions = pd.read_csv(new_transactions_file)
            # Concatenate new data with existing data
            self.transactions = pd.concat([self.transactions, new_transactions])

        if new_products_file is not None:
            # Load new products data
            new_products = pd.read_csv(new_products_file)
            # Concatenate new data with existing data
            self.products = pd.concat([self.products, new_products])

    def train_model(self):
        self.preprocess_data()
        # Get top 10 most popular products based on total orders
        self.top_products = self.transactions.groupby('product_id').agg({'user_id': 'count'}).reset_index().sort_values('user_id', ascending=False)[:10]
        
        # Calculate weight and order position for each user-product combination
        self.top_k = self.transactions.groupby(['user_id', 'product_id']).agg({'weight_order': 'max', 'add_to_cart_order': np.median}).reset_index()
        
        # Calculate new weight based on add_to_cart order
        self.top_k['new_weight'] = self.top_k['weight_order'] / self.top_k['add_to_cart_order']
        
        # Sort by new weight in descending order
        self.top_k = self.top_k.sort_values(by=['new_weight'], ascending=False)
        
        # Add products to users predictions, if they have less then 10 products
        df_top = self.top_k.groupby('user_id')
        users = df_top.groups.keys()
        
        for user_id  in users:
                df_user = df_top.get_group(user_id)
                if len(df_user['product_id']) < 10:
                    # Fint items that not already in users recomandadion
                    missing_products = self.top_products[~(self.top_products['product_id'].isin(df_user['product_id']))][:10-len(df_user['product_id'])]
                    # Add user ID to missing products
                    missing_products['user_id'] = user_id
                    # Add users missing products to update recommendations
                    self.top_k = pd.concat([self.top_k, missing_products])

    def get_top_k_recommendations(self, user_id, k=10):
        # Get top K recommendations for one user
        if user_id not in self.top_k.user_id.unique():
            top_k_products = self.top_products.head(k)
            top_k_recommendations = top_k_products.merge(self.products, on='product_id', how='left')['product_name'].tolist()
            return top_k_recommendations
              
        top_k_products = self.top_k[self.top_k['user_id'] == user_id].head(k)
        # Join with the products DataFrame to get product names
        top_k_recommendations = top_k_products.merge(self.products, on='product_id', how='left')['product_name'].tolist()
        return top_k_recommendations

    def get_top_k_recommendations_for_users(self, user_ids, k=10):
        # Get top K recommendations for several users
        recommendations = []
        for user_id in user_ids:
            user_recommendations = self.get_top_k_recommendations(user_id, k)
            recommendations.append(user_recommendations)
        return recommendations

# Example usage
recommender = SmartRecs('transactions.csv', 'products.csv')


# Train the recommender system model
recommender.train_model()





In [5]:
# Get top K recommendations for a single user
user_id = 123
top_k = 5
user_recommendations = recommender.get_top_k_recommendations(user_id, top_k)
print(f"Top {top_k} recommendations for user {user_id}: {user_recommendations}")

Top 5 recommendations for user 123: ['Organic Unsweetened Almond Milk', 'Hass Avocados', 'Soy Sauce', 'Organic Spring Mix', 'Organic Tortilla Chips']


In [6]:
# Get top K recommendations for multiple users
user_ids = [123, 3000000, 789]
user_recommendations = recommender.get_top_k_recommendations_for_users(user_ids, top_k)
for i, user_id in enumerate(user_ids):
    print(f"Top {top_k} recommendations for user {user_id}: {user_recommendations[i]}")

Top 5 recommendations for user 123: ['Organic Unsweetened Almond Milk', 'Hass Avocados', 'Soy Sauce', 'Organic Spring Mix', 'Organic Tortilla Chips']
Top 5 recommendations for user 3000000: ['Banana', 'Bag of Organic Bananas', 'Organic Strawberries', 'Organic Baby Spinach', 'Organic Hass Avocado']
Top 5 recommendations for user 789: ['Cheez-It Baked Snack Crackers', 'French Vanilla Creamer', 'Organic Half & Half', 'Chunky Peanut Butter', 'Balsamic Vinegar']


In [None]:
# Update data with fresh transactions and product information
recommender.update_data(new_transactions_file='new_transactions.csv', 
                        new_products_file='new_products.csv')

# Retrain the recommender system model
recommender.train_model()