In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
# Load the dataset
df = pd.read_csv('/content/extended_food_disease_dishes_dataset.csv')  # Replace with your actual path
df.head()

Unnamed: 0,Food Name,Protein,Fats,Carbs,Calories,Disease,Dutch Dish,German Dish
0,Kale,23,18,23,346,Liver Disease,Bitterballen,Bratwurst
1,Veggie Wrap,5,10,28,222,Heart Disease,Boerenkool,Sauerbraten
2,Oats,23,8,22,252,Osteoporosis,Zuurkool,Schupfnudeln
3,Carrots,13,10,25,242,Asthma,Hutspot,Spätzle
4,Garlic Shrimp,23,12,12,248,Heart Disease,Poffertjes,Knödel


In [3]:
# Initialize LabelEncoders
label_encoder_food = LabelEncoder()
label_encoder_disease = LabelEncoder()
label_encoder_dutch = LabelEncoder()
label_encoder_german = LabelEncoder()

# Encode categorical columns
df['Food Name Encoded'] = label_encoder_food.fit_transform(df['Food Name'])
df['Disease Encoded'] = label_encoder_disease.fit_transform(df['Disease'])
df['Dutch Dish Encoded'] = label_encoder_dutch.fit_transform(df['Dutch Dish'])
df['German Dish Encoded'] = label_encoder_german.fit_transform(df['German Dish'])

# Select features and targets
X = df[['Food Name Encoded', 'Disease Encoded', 'Dutch Dish Encoded', 'German Dish Encoded']]
y = df[['Protein', 'Fats', 'Carbs', 'Calories']]

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Train a Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [6]:
# Evaluate the model with RMSE
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error: {rmse:.2f}")

Root Mean Squared Error: 31.18


In [7]:
def recommend_foods(disease_input, top_n=5):
    disease_encoded = label_encoder_disease.transform([disease_input])[0]

    # Create inputs for each category
    user_food_data = [[label_encoder_food.transform([food])[0], disease_encoded, 0, 0] for food in label_encoder_food.classes_]
    dutch_food_data = [[0, disease_encoded, label_encoder_dutch.transform([dish])[0], 0] for dish in label_encoder_dutch.classes_]
    german_food_data = [[0, disease_encoded, 0, label_encoder_german.transform([dish])[0]] for dish in label_encoder_german.classes_]

    # Scale data
    user_food_data_scaled = scaler.transform(user_food_data)
    dutch_food_data_scaled = scaler.transform(dutch_food_data)
    german_food_data_scaled = scaler.transform(german_food_data)

    # Predict nutritional values
    user_predictions = model.predict(user_food_data_scaled)
    dutch_predictions = model.predict(dutch_food_data_scaled)
    german_predictions = model.predict(german_food_data_scaled)

    # Create DataFrames and map back to food names
    user_recommendations = pd.DataFrame(user_predictions, columns=['Protein', 'Fats', 'Carbs', 'Calories'])
    user_recommendations['Food Name'] = label_encoder_food.inverse_transform([row[0] for row in user_food_data])
    dutch_recommendations = pd.DataFrame(dutch_predictions, columns=['Protein', 'Fats', 'Carbs', 'Calories'])
    dutch_recommendations['Dutch Dish'] = label_encoder_dutch.inverse_transform([row[2] for row in dutch_food_data])
    german_recommendations = pd.DataFrame(german_predictions, columns=['Protein', 'Fats', 'Carbs', 'Calories'])
    german_recommendations['German Dish'] = label_encoder_german.inverse_transform([row[3] for row in german_food_data])

    # Round nutritional values and select top recommendations
    user_recommendations = user_recommendations[['Food Name', 'Protein', 'Fats', 'Carbs', 'Calories']].round(1).sort_values(by='Calories').head(top_n)
    dutch_recommendations = dutch_recommendations[['Dutch Dish', 'Protein', 'Fats', 'Carbs', 'Calories']].round(1).sort_values(by='Calories').head(top_n)
    german_recommendations = german_recommendations[['German Dish', 'Protein', 'Fats', 'Carbs', 'Calories']].round(1).sort_values(by='Calories').head(top_n)

    return user_recommendations, dutch_recommendations, german_recommendations

In [8]:
# Example usage of recommendation function
user_rec, dutch_rec, german_rec = recommend_foods("Heart Disease")

print("Top 5 Recommended Foods for Heart Disease (User's Own):")
print(user_rec)

print("\nTop 5 Recommended Dutch Dishes for Heart Disease:")
print(dutch_rec)

print("\nTop 5 Recommended German Dishes for Heart Disease:")
print(german_rec)

Top 5 Recommended Foods for Heart Disease (User's Own):
           Food Name  Protein  Fats  Carbs  Calories
78  Zucchini Noodles     10.8   9.2   13.9     181.1
77          Zucchini     11.1   9.2   14.0     182.7
76    Yogurt Parfait     11.1   9.2   14.0     182.7
74       Veggie Wrap     10.8   9.6   13.6     183.8
75           Walnuts     11.0   9.5   13.6     184.1

Top 5 Recommended Dutch Dishes for Heart Disease:
          Dutch Dish  Protein  Fats  Carbs  Calories
14  Saucijzenbroodje     10.5   8.8   16.6     187.8
16          Stamppot     13.3   8.7   15.0     191.2
15          Spekkoek     12.0   8.6   16.4     191.3
6            Hutspot      9.3   7.9   20.9     191.9
2         Boerenkool     10.4  10.4   15.0     195.3

Top 5 Recommended German Dishes for Heart Disease:
      German Dish  Protein  Fats  Carbs  Calories
7          Knödel     14.1  10.2   11.0     192.0
9       Leberkäse     14.0   9.8   12.3     193.2
8      Käsekuchen     14.0  10.2   11.6     193.4
6  Ka



In [9]:
import joblib

# Assuming `model` is your trained model
joblib.dump(model, 'food_recommendation_model.pkl')
print("Model saved successfully!")

Model saved successfully!


In [10]:
# Save the scaler (if used)
joblib.dump(scaler, 'scaler.pkl')
print("Scaler saved as 'scaler.pkl'")

# Save the label encoders for 'Disease' and 'Food Name' columns
joblib.dump(label_encoder_disease, 'label_encoder_disease.pkl')
print("Label encoder for disease saved as 'label_encoder_disease.pkl'")

joblib.dump(label_encoder_food, 'label_encoder_food.pkl')
print("Label encoder for food name saved as 'label_encoder_food.pkl'")


Scaler saved as 'scaler.pkl'
Label encoder for disease saved as 'label_encoder_disease.pkl'
Label encoder for food name saved as 'label_encoder_food.pkl'


In [11]:
joblib.dump(label_encoder_dutch, 'label_encoder_dutch.pkl')
joblib.dump(label_encoder_german, 'label_encoder_german.pkl')

['label_encoder_german.pkl']