In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

In [2]:
df = pd.read_csv("./CSV/Dataset.csv")
df = df.drop(["Unnamed: 0", "BMI_tags", "Label"], axis=1)
df = pd.get_dummies(df, columns=['gender'], drop_first=False)
X = df.drop(['calories_to_maintain_weight'], axis=1)
Y = df['calories_to_maintain_weight']
print(X.head())
print(Y.head())
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
modelLR = LinearRegression()
modelLR.fit(X_train, Y_train)

   age  weight(kg)  height(m)        BMI      BMR  activity_level  gender_F  \
0    2   16.097862   0.932025  18.531612  958.584             1.2      True   
1    4   14.619374   0.916687  17.397496  932.383             1.7      True   
2    4   17.899918   0.997288  17.997414  977.578             1.9      True   
3    3   13.532880   1.022786  12.936609  944.689             1.9      True   
4    4   17.039484   1.053977  15.338909  799.229             1.9     False   

   gender_M  
0     False  
1     False  
2     False  
3     False  
4      True  
0    1150.3008
1    1585.0511
2    1857.3982
3    1794.9091
4    1518.5351
Name: calories_to_maintain_weight, dtype: float64


In [3]:
coefficients = modelLR.coef_
print("Coefficients:", coefficients)
intercept = modelLR.intercept_
print("Intercept:", intercept)

Coefficients: [   1.48117891  -15.09140007  837.12897783   32.65318097    1.55180835
 1416.93164942   19.53302319  -19.53302319]
Intercept: -3435.785652756707


In [4]:
Y_pred = modelLR.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print(f'Mean Squared Error: {mse}')
accuracy = modelLR.score(X_test, Y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')

Mean Squared Error: 1966.0291331480491
Accuracy: 98.74%


In [5]:
def calculate_bmi(weight_kg, height_m):
    return weight_kg / (height_m ** 2)

def calculate_bmr(age, weight_kg, height_m, gender_F, gender_M):
    if gender_F == 1:  # Female
        return 655 + (9.6 * weight_kg) + (1.8 * height_m * 100) - (4.7 * age)
    elif gender_M == 1:  # Male
        return 66 + (13.7 * weight_kg) + (5 * height_m * 100) - (6.8 * age)
    else:
        raise ValueError("Invalid gender values")
def calculate_daily_calories_direct(user_inputs, initial_weight, desired_weight, time_interval, modelLR):
    # Extract user inputs
    age = user_inputs['age']
    weight_kg = user_inputs['weight(kg)']
    height_m = user_inputs['height(m)']
    gender_F = user_inputs['gender_F']
    gender_M = user_inputs['gender_M']
    activity_level = user_inputs['activity_level']

    # Calculate BMI and BMR using the provided formulas
    BMI = calculate_bmi(weight_kg, height_m)
    BMR = calculate_bmr(age, weight_kg, height_m, gender_F, gender_M)
 # Calculate total weight change
    weight_change = desired_weight - initial_weight
    calories_per_kg = 7700

    daily_weight_change =(weight_change) / (time_interval)
    # Estimated calories per kg change (adjust as needed)


    # Calculate caloric difference based on weight change goal
    caloric_difference = daily_weight_change * calories_per_kg

    # Create input array for model prediction
    input_array = np.array([[age, weight_kg, height_m, BMI, BMR, activity_level, gender_F, gender_M]])

    print(input_array)

    # Use the trained model to predict daily maintenance calories
    model_predicted_calories = modelLR.predict(input_array)
    print(model_predicted_calories)
    # Sum the model predicted calories and caloric difference
    daily_cal = model_predicted_calories + caloric_difference

    return daily_cal

In [6]:
#DNR
# Example usage
user_inputs = {
    'age': 25,
    'weight(kg)': 70,
    'height(m)': 1.75,
    'gender_F': 0,  # Female
    'gender_M': 1,  # Male
    'activity_level': 1.5  # Moderate activity level (replace with user input)
}
initial_weight = 100  # replace with user input
desired_weight = 80  # replace with user input
time_interval_days = 2000  # replace with user input

# Assume 'model' is your trained machine learning model
daily_calorie_difference_direct = calculate_daily_calories_direct(user_inputs, initial_weight, desired_weight, time_interval_days, modelLR)

# Display the result
print(f"Estimated Daily Caloric Intake for Weight Change: {daily_calorie_difference_direct[0]:.2f} calories")

[[2.50000000e+01 7.00000000e+01 1.75000000e+00 2.28571429e+01
  1.73000000e+03 1.50000000e+00 0.00000000e+00 1.00000000e+00]]
[2546.6728392]
Estimated Daily Caloric Intake for Weight Change: 2469.67 calories




In [7]:
food_df = pd.read_csv('./CSV/RAW_recipes.csv')

# Extract unique food names
unique_food_names = food_df['name'].unique()

# Print the total number of unique food names
total_unique_food_names = len(unique_food_names)
print("Total Unique Food Names:", total_unique_food_names)

# Print the top 100 unique food names
print("\nTop 100 Unique Food Names:")
for food_name in unique_food_names[:100]:
    print(food_name)

Total Unique Food Names: 230186

Top 100 Unique Food Names:
arriba   baked winter squash mexican style
a bit different  breakfast pizza
all in the kitchen  chili
alouette  potatoes
amish  tomato ketchup  for canning
apple a day  milk shake
aww  marinated olives
backyard style  barbecued ribs
bananas 4 ice cream  pie
beat this  banana bread
berry  good sandwich spread
better than sex  strawberries
better then bush s  baked beans
boat house  collard greens
calm your nerves  tonic
chicken lickin  good  pork chops
chile rellenos
chinese  candy
chinese  chop suey
cream  of cauliflower soup  vegan
cream  of spinach soup
cream  of spinach soup  vegan
crispy crunchy  chicken
deep fried dessert thingys
easiest ever  hollandaise sauce
emotional balance  spice mixture
fool the meat eaters  chili
forgotten  minestrone
fried  potatoes
george s at the cove  black bean soup
get the sensation  brownies
global gourmet  taco casserole
grilled  ranch bread
grilled  venison burgers
healthy for them  yogur

In [8]:
# DIRECT K MEANS CLUSTERING APPROACH.
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

food_df = pd.read_csv('./CSV/RAW_recipes.csv')
def preprocess_data(df):
    df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']] = df['nutrition'].str.split(",", expand=True)
    df['calories'] = df['calories'].apply(lambda x: x.replace('[',''))
    df['carbohydrates (PDV)'] = df['carbohydrates (PDV)'].apply(lambda x: x.replace(']',''))
    df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']] = df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']].astype('float')
    
    df.drop(['id', 'contributor_id', 'submitted', 'tags', 'nutrition'], axis=1, inplace=True)

    df['food types'] = np.nan
    df['food types'] = df['food types'].astype('str')
    
    for i in df.index:
        if 'eggs' not in df.loc[i, 'ingredients']:
            if 'ice-cream' in df.loc[i, 'ingredients'] or 'chocolate' in df.loc[i, 'ingredients'] or 'cookies' in df.loc[i, 'ingredients']:
                df.loc[i, 'food types'] = 'Veg dessert'
        elif 'eggs' in df.loc[i, 'ingredients']:
            if 'ice-cream' in df.loc[i, 'ingredients'] or 'chocolate' in df.loc[i, 'ingredients'] or 'cookies' in df.loc[i, 'ingredients']:
                df.loc[i, 'food types'] = 'Non-Veg dessert'
    
    for i in df.index:
        if df.loc[i, 'food types'] != 'Veg dessert' and df.loc[i, 'food types'] != 'Non-Veg dessert' and 20 < df.loc[i, 'calories'] < 300:
            df.loc[i, 'food types'] = 'Healthy'
    
    for i in df.index:
        if df.loc[i, 'food types'] != 'Veg dessert' and df.loc[i, 'food types'] != 'Non-Veg dessert' and df.loc[i, 'food types'] != 'Healthy':
            if 'chicken' in df.loc[i, 'ingredients'] or 'eggs' in df.loc[i, 'ingredients'] or 'ham' in df.loc[i, 'ingredients'] or 'pepperoni' in df.loc[i, 'ingredients']:
                df.loc[i, 'food types'] = 'Non-veg'
    
    for i in df.index:
        if df.loc[i, 'food types'] != 'Veg dessert' and df.loc[i, 'food types'] != 'Non-Veg dessert' and df.loc[i, 'food types'] != 'Healthy' and df.loc[i, 'food types'] != 'Non-veg':
            df.loc[i, 'food types'] = 'Veg'
    
    df['food types'].value_counts()
    df['food types'].isnull().sum()
    
    return df

# Apply the function to your dataframe
food_df = preprocess_data(food_df)
food_df.head()

# One-hot encode food types
types = pd.get_dummies(food_df['food types'])
food_df = pd.concat([food_df, types], axis=1)

# Create a pivot table for nutrient details
cal = food_df.pivot_table(columns='name', values=['total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)'])

# Apply K-Means clustering
columns_for_clustering = ['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']
clustering_data = food_df[columns_for_clustering]
scaler = StandardScaler()
clustering_data_standardized = scaler.fit_transform(clustering_data)

kmeans = KMeans(n_clusters=2000, n_init=10, random_state=42)
food_df['cluster'] = kmeans.fit_predict(clustering_data_standardized)
print(food_df.head(5))

[WinError 2] The system cannot find the file specified
  File "x:\CURRENT PROJECTS\Diet Recommander\DietRecommander\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\phyoz\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\phyoz\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\phyoz\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


                                         name  minutes  n_steps  \
0  arriba   baked winter squash mexican style       55       11   
1            a bit different  breakfast pizza       30        9   
2                   all in the kitchen  chili      130        6   
3                          alouette  potatoes       45       11   
4          amish  tomato ketchup  for canning      190        5   

                                               steps  \
0  ['make a choice and proceed with recipe', 'dep...   
1  ['preheat oven to 425 degrees f', 'press dough...   
2  ['brown ground beef in large pot', 'add choppe...   
3  ['place potatoes in a large pot of lightly sal...   
4  ['mix all ingredients& boil for 2 1 / 2 hours ...   

                                         description  \
0  autumn is my favorite time of year to cook! th...   
1  this recipe calls for the crust to be prebaked...   
2  this modified version of 'mom's' chili was a h...   
3  this is a super easy, great tasti

In [9]:
# Assuming 'cluster' is the column representing the clusters in your DataFrame
cluster_counts = food_df['cluster'].value_counts()

# Display the number of elements in each cluster
print("Number of elements in each cluster:")
print(cluster_counts.head(50))

Number of elements in each cluster:
cluster
930     4757
154     3013
1529    2643
800     2553
1958    2396
1859    2202
80      2096
1652    2088
1855    1972
1531    1761
1409    1702
646     1685
376     1606
1473    1598
578     1553
739     1471
1687    1435
811     1422
275     1392
1125    1378
606     1322
1312    1302
529     1247
146     1244
375     1171
834     1166
441     1136
1259    1117
464     1110
1561    1088
1725    1072
1653    1046
1745    1009
1777     997
93       987
1916     967
554      964
905      959
1517     947
14       929
528      928
1901     917
360      915
1931     911
807      871
889      850
49       847
729      843
514      840
1609     826
Name: count, dtype: int64


In [11]:
import joblib 
filename = 'finalModel.sav'
joblib.dump(modelLR, filename)

['finalModel.sav']