In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout, InputLayer

# Load the dataset
df = pd.read_csv('C:/Users/VARDHMAN/Downloads/MSCI-446/updated_cleaned_recipe_dataset.csv')

# Data preprocessing
# Text processing for 'ingredients'
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['Ingredient'])  
sequences = tokenizer.texts_to_sequences(df['Ingredient'])
padded_ingredients = pad_sequences(sequences, maxlen=50)

# One-hot encoding for 'dietary_info'
ohe = OneHotEncoder()
dietary_info_encoded = ohe.fit_transform(df[['Dietary Preference']]).toarray()

# Combine processed features
X = np.concatenate([padded_ingredients, dietary_info_encoded], axis=1)

# Assuming 'calories' is a target variable
y = df['Calories'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model building
model = Sequential([
    InputLayer(shape=(X_train.shape[1],)),
    Embedding(input_dim=10000 + 1, output_dim=64),  # Adjust input_dim as needed, remove input_length
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model
model.evaluate(X_test, y_test)



Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 249ms/step - loss: 105322.1719 - mean_absolute_error: 321.3148 - val_loss: 96137.2344 - val_mean_absolute_error: 305.1777
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 105465.9609 - mean_absolute_error: 321.7940 - val_loss: 95172.4219 - val_mean_absolute_error: 303.5988
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 105205.3438 - mean_absolute_error: 321.5848 - val_loss: 93118.6953 - val_mean_absolute_error: 300.2091
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 99696.4609 - mean_absolute_error: 312.1028 - val_loss: 89226.9922 - val_mean_absolute_error: 293.6792
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 98224.3047 - mean_absolute_error: 310.3652 - val_loss: 82730.2422 - val_mean_absolute_error: 282.4415
Epoch 6/20
[1m2/2[0m [32m━━━

[11414.1689453125, 95.32728576660156]