<a href="https://colab.research.google.com/github/aimldlnlp/C242-PS395/blob/main/%5BC242_PS395%5D_classification%2Brecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -*- coding: utf-8 -*-
"""
Food Recommendation System
------------------------------------
Provides recommendations across daily, weekly, and monthly timeframes
based on sugar intake and consumption patterns.
"""

import os
import logging
import warnings
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Suppress warnings
warnings.filterwarnings('ignore')

In [5]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [6]:
config_path = '/content/drive/My Drive/config.yaml'

In [7]:
# Load data
daily_logs = pd.read_csv('/content/drive/MyDrive/C242-PS395/data_users.csv')
food_data = pd.read_csv('/content/drive/MyDrive/C242-PS395/food_data.csv')

In [8]:
(daily_logs.head())

Unnamed: 0,User ID,Date,Dish Name,Sugar
0,1,2014-09-15,"my - McDonalds Espresso Pronto® Flat White, 2 ...",29
1,1,2014-09-15,"Uncle Tobys Australia - Vita Brits, 3 Biscuits...",0
2,1,2014-09-15,"Pauls - Smarter White Milk, 600 ml",34
3,1,2014-09-16,"Gatorade - Lemon-Lime (Australia), 600 ml",36
4,1,2014-09-16,"my - McDonalds Espresso Pronto® Flat White, 2 ...",29


In [9]:
(food_data.head())

Unnamed: 0,food,Sugars
0,cream cheese,0.5
1,neufchatel cheese,2.7
2,requeijao cremoso light catupiry,3.4
3,ricotta cheese,0.091
4,cream cheese low fat,0.9


In [10]:
# Global configuration
def load_config(config_path):
    """
    Load configuration from YAML file or return default settings

    :param config_path: Path to configuration file
    :return: Configuration dictionary
    """
    try:
        with open(config_path, 'r') as file:
            config = yaml.safe_load(file)
    except FileNotFoundError:
        logger.warning(f"Config file not found at {config_path}. Using default settings.")
        config = {
            'daily_sugar_limit': 50,
            'weekly_sugar_limit': 350,
            'monthly_sugar_limit': 1500,
            'recommendation_top_n': 5
        }
    return config


In [11]:
# Helper functions for recommendation system
def classify_sugar_intake(sugar_intake, limit):
    """
    Classify sugar intake level

    :param sugar_intake: Total sugar intake
    :param limit: Sugar intake limit
    :return: Sugar level classification
    """
    if sugar_intake < 0.8 * limit:
        return 'Low'
    elif sugar_intake <= limit:
        return 'Normal'
    else:
        return 'High'

In [12]:
def aggregate_data(daily_logs, groupby_cols, limit, agg_period, config):
    """
    Aggregate data for different timeframes

    :param daily_logs: Daily logs DataFrame
    :param groupby_cols: Columns to group by
    :param limit: Sugar intake limit
    :param agg_period: Aggregation period ('day', 'week', 'month')
    :param config: Configuration dictionary
    :return: Aggregated DataFrame
    """
    # Aggregate data
    agg_data = daily_logs.groupby(groupby_cols).agg({
        'Dish Name': lambda x: ', '.join(x),
        'Sugar': 'sum'
    }).reset_index()

    # Rename columns
    agg_data.rename(columns={
        'User ID': 'user_id',
        'Sugar': f'{agg_period}_sugar_intake',
        'Dish Name': f'{agg_period}_input_dish'
    }, inplace=True)

    # Classify sugar levels
    agg_data[f'{agg_period}_sugar_level'] = agg_data[f'{agg_period}_sugar_intake'].apply(
        lambda x: classify_sugar_intake(x, limit)
    )

    return agg_data

In [13]:
def preprocess_data(daily_logs, config):
    """
    Comprehensive data preprocessing for multiple timeframes

    :param daily_logs: Daily logs DataFrame
    :param config: Configuration dictionary
    :return: Processed DataFrames for daily, weekly, and monthly aggregations
    """
    # Convert date column
    daily_logs['Date'] = pd.to_datetime(daily_logs['Date'])

    # Daily Aggregation
    daily_agg = daily_logs.groupby(['User ID', 'Date']).agg({
        'Dish Name': lambda x: ', '.join(x),
        'Sugar': 'sum'
    }).reset_index()
    daily_agg.columns = ['user_id', 'date', 'daily_input_dish', 'daily_sugar_intake']
    daily_agg['daily_sugar_level'] = daily_agg['daily_sugar_intake'].apply(
        lambda x: classify_sugar_intake(x, config['daily_sugar_limit'])
    )

    # Weekly Aggregation
    weekly_agg = daily_logs.groupby(['User ID', pd.Grouper(key='Date', freq='W')]).agg({
        'Dish Name': lambda x: ', '.join(x),
        'Sugar': 'sum'
    }).reset_index()
    weekly_agg.columns = ['user_id', 'date', 'weekly_input_dish', 'weekly_sugar_intake']
    weekly_agg['weekly_sugar_level'] = weekly_agg['weekly_sugar_intake'].apply(
        lambda x: classify_sugar_intake(x, config['weekly_sugar_limit'])
    )

    # Monthly Aggregation
    monthly_agg = daily_logs.groupby(['User ID', pd.Grouper(key='Date', freq='M')]).agg({
        'Dish Name': lambda x: ', '.join(x),
        'Sugar': 'sum'
    }).reset_index()
    monthly_agg.columns = ['user_id', 'date', 'monthly_input_dish', 'monthly_sugar_intake']
    monthly_agg['monthly_sugar_level'] = monthly_agg['monthly_sugar_intake'].apply(
        lambda x: classify_sugar_intake(x, config['monthly_sugar_limit'])
    )

    return daily_agg, weekly_agg, monthly_agg


In [14]:
def recommend_food(user_id, aggregated_data, food_data, timeframe, config):
    """
    Recommend foods for a specific user across different timeframes

    :param user_id: User ID
    :param aggregated_data: Aggregated data (daily/weekly/monthly)
    :param food_data: Food database
    :param timeframe: Recommendation timeframe
    :param config: Configuration dictionary
    :return: Recommended foods
    """
    try:
        # Initialize TF-IDF Vectorizer
        tfidf_vectorizer = TfidfVectorizer(stop_words='english')

        # Select appropriate columns based on timeframe
        user_col = f'{timeframe}_input_dish'
        sugar_col = f'{timeframe}_sugar_intake'
        level_col = f'{timeframe}_sugar_level'

        # Filter user data
        user_info = aggregated_data[aggregated_data['user_id'] == user_id]
        if user_info.empty:
            logger.warning(f"No data found for user {user_id} in {timeframe} timeframe")
            return None

        # Prepare recommendation features
        user_food_list = user_info[user_col].values[0]
        user_sugar_intake = user_info[sugar_col].values[0]
        current_sugar_level = user_info[level_col].values[0]

        # Vectorize food database and user food list
        food_corpus = food_data['food'].tolist()
        tfidf_matrix = tfidf_vectorizer.fit_transform(food_corpus + [str(user_food_list)])

        # Compute food similarity
        user_vector = tfidf_matrix[-1]  # Last vector is the user's food list
        food_vectors = tfidf_matrix[:-1]  # All vectors except the last one
        cosine_sim = cosine_similarity(user_vector, food_vectors).flatten()

        # Rank recommendations
        recommendations = food_data.copy()
        recommendations['similarity_score'] = cosine_sim

        # Filter recommendations based on current sugar level
        if current_sugar_level == 'High':
            # Recommend low sugar foods
            recommendations = recommendations[recommendations['Sugars'] < 5]
        elif current_sugar_level == 'Low':
            # More flexible recommendations
            recommendations = recommendations[recommendations['Sugars'] <= 15]

        # Sort and return top recommendations
        top_n = config.get('recommendation_top_n', 5)
        recommended_foods = recommendations.nlargest(top_n, 'similarity_score')[
            ['food', 'Sugars', 'similarity_score']
        ]

        logger.info(f"{timeframe.capitalize()} Recommendations for User {user_id} (Current Sugar Level: {current_sugar_level}):")
        logger.info(recommended_foods)

        return recommended_foods

    except Exception as e:
        logger.error(f"Recommendation error for {timeframe} timeframe: {e}")
        return None

In [15]:
def generate_comprehensive_recommendations(user_id, daily_logs, food_data, config):
    """
    Generate recommendations across all timeframes with user sugar level analysis

    :param user_id: User ID
    :param daily_logs: Daily logs DataFrame
    :param food_data: Food database
    :param config: Configuration dictionary
    :return: Dictionary of recommendations with user sugar level
    """
    # Preprocess data
    daily_agg, weekly_agg, monthly_agg = preprocess_data(daily_logs, config)

    # Determine overall sugar level for the user
    user_daily_sugar = daily_agg[daily_agg['user_id'] == user_id]['daily_sugar_intake']
    user_weekly_sugar = weekly_agg[weekly_agg['user_id'] == user_id]['weekly_sugar_intake']
    user_monthly_sugar = monthly_agg[monthly_agg['user_id'] == user_id]['monthly_sugar_intake']

    # Calculate average sugar intake
    try:
        avg_daily_sugar = user_daily_sugar.mean()
        avg_weekly_sugar = user_weekly_sugar.mean()
        avg_monthly_sugar = user_monthly_sugar.mean()

        # Classify overall sugar levels
        daily_sugar_level = classify_sugar_intake(avg_daily_sugar, config['daily_sugar_limit'])
        weekly_sugar_level = classify_sugar_intake(avg_weekly_sugar, config['weekly_sugar_limit'])
        monthly_sugar_level = classify_sugar_intake(avg_monthly_sugar, config['monthly_sugar_limit'])
    except (ValueError, TypeError):
        daily_sugar_level = weekly_sugar_level = monthly_sugar_level = 'Unknown'
        avg_daily_sugar = avg_weekly_sugar = avg_monthly_sugar = 0

    # Generate recommendations
    recommendations = {
        'daily': recommend_food(user_id, daily_agg, food_data, 'daily', config),
        'weekly': recommend_food(user_id, weekly_agg, food_data, 'weekly', config),
        'monthly': recommend_food(user_id, monthly_agg, food_data, 'monthly', config),
        'user_sugar_levels': {
            'daily': {
                'level': daily_sugar_level,
                'average_intake': round(avg_daily_sugar, 2),
                'limit': config['daily_sugar_limit']
            },
            'weekly': {
                'level': weekly_sugar_level,
                'average_intake': round(avg_weekly_sugar, 2),
                'limit': config['weekly_sugar_limit']
            },
            'monthly': {
                'level': monthly_sugar_level,
                'average_intake': round(avg_monthly_sugar, 2),
                'limit': config['monthly_sugar_limit']
            }
        }
    }

    return recommendations

In [16]:
def run_recommendation_system(daily_logs_path, food_data_path, user_id=None, config_path=None):
    """
    Main execution function with improved error handling and flexibility

    :param daily_logs_path: Path to daily logs
    :param food_data_path: Path to food data
    :param user_id: Specific user ID for recommendations (optional)
    :param config_path: Path to configuration file (optional)
    :return: Recommendations dictionary
    """
    try:
        # Load configuration
        if config_path is None:
            config = {
                'daily_sugar_limit': 50,
                'weekly_sugar_limit': 350,
                'monthly_sugar_limit': 1500,
                'recommendation_top_n': 5
            }
        else:
            config = load_config(config_path)

        # Load data with error handling
        try:
            daily_logs = pd.read_csv(daily_logs_path)
            food_data = pd.read_csv(food_data_path)
        except FileNotFoundError as e:
            logger.error(f"Data file not found: {e}")
            return None
        except pd.errors.EmptyDataError:
            logger.error("One of the data files is empty")
            return None

        # Validate data
        required_daily_cols = ['User ID', 'Date', 'Dish Name', 'Sugar']
        required_food_cols = ['food', 'Sugars']

        for col in required_daily_cols:
            assert col in daily_logs.columns, f"Missing column {col} in daily logs"

        for col in required_food_cols:
            assert col in food_data.columns, f"Missing column {col} in food data"

        # If no specific user is provided, use the first user in the dataset
        if user_id is None:
            user_id = daily_logs['User ID'].unique()[0]

        # Generate comprehensive recommendations
        recommendations = generate_comprehensive_recommendations(
            user_id, daily_logs, food_data, config
        )

        return recommendations

    except Exception as e:
        logger.error(f"Recommendation system execution error: {e}")
        return None

In [18]:
def main():
    # Run recommendations
    recommendations = run_recommendation_system(
        daily_logs_path = '/content/drive/MyDrive/C242-PS395/data_users.csv',
        food_data_path = '/content/drive/MyDrive/C242-PS395/food_data.csv',
        user_id=5  # Optional: specify a specific user ID
    )

    # Print recommendations
    if recommendations:
        # Display user sugar levels
        print("\nUser Sugar Level Analysis:")
        for timeframe, sugar_info in recommendations['user_sugar_levels'].items():
            print(f"\n{timeframe.capitalize()} Sugar Level:")
            print(f"  Level: {sugar_info['level']}")
            print(f"  Average Intake: {sugar_info['average_intake']} g")
            print(f"  Limit: {sugar_info['limit']} g")

        # Print food recommendations
        print("\nFood Recommendations:")
        for timeframe, rec in recommendations.items():
            if timeframe != 'user_sugar_levels':
                print(f"\n{timeframe.capitalize()} Recommendations:")
                print(rec)

if __name__ == "__main__":
    main()


User Sugar Level Analysis:

Daily Sugar Level:
  Level: High
  Average Intake: 124.5 g
  Limit: 50 g

Weekly Sugar Level:
  Level: High
  Average Intake: 622.5 g
  Limit: 350 g

Monthly Sugar Level:
  Level: High
  Average Intake: 1867.5 g
  Limit: 1500 g

Food Recommendations:

Daily Recommendations:
                               food  Sugars  similarity_score
790       vanilla bluberry bar kind   0.096          0.202540
798                 almond rice bar   0.000          0.198100
793      chocolate chip granola bar   0.000          0.188807
1612  vegetable oil spread fat free   0.000          0.186561
5             cream cheese fat free   1.000          0.178083

Weekly Recommendations:
                               food  Sugars  similarity_score
1848          potato chips fat free   8.400          0.224807
790       vanilla bluberry bar kind   0.096          0.202540
798                 almond rice bar   0.000          0.198100
793      chocolate chip granola bar   0.000        