In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout

# Load the dataset
df = pd.read_csv('/mnt/data/updated_cleaned_recipe_dataset.csv')

# Data preprocessing
# Text processing for 'ingredients'
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['ingredients'])
sequences = tokenizer.texts_to_sequences(df['ingredients'])
padded_ingredients = pad_sequences(sequences, maxlen=50)

# One-hot encoding for 'dietary_info'
ohe = OneHotEncoder(sparse=False)
dietary_info_encoded = ohe.fit_transform(df[['dietary_info']])

# Combine processed features
X = np.concatenate([padded_ingredients, dietary_info_encoded], axis=1)

# Assuming 'calories' is a target variable
y = df['calories'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model building
model = Sequential([
    InputLayer(input_shape=(X_train.shape[1],)),
    Embedding(input_dim=10000, output_dim=64, input_length=50),  # can adjust these parameters
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model
model.evaluate(X_test, y_test)

