In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
data=pd.read_csv("C:/Users/DELL/Downloads/recipes.csv/recipes.csv")

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
import matplotlib.pyplot as plt

calories = data["Calories"].dropna()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# ðŸ”¹ Histogram
axes[0].hist(
    calories,
    bins=[0,100,200,300,400,500,600,700,800,900,1000,2000,3000,5000],
    edgecolor="black"
)
axes[0].set_title("Calories Distribution")
axes[0].set_xlabel("Calories")
axes[0].set_ylabel("Frequency")
axes[0].grid(axis="y", alpha=0.3)

# ðŸ”¹ Line plot
axes[1].plot(calories.values)
axes[1].set_title("Calories Trend")
axes[1].set_xlabel("Food Index")
axes[1].set_ylabel("Calories")

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

bins = [0,200,400,600,800,1000,5000]
labels = ["Low", "Medium", "High", "Very High", "Extreme", "Ultra"]

data["Calorie_Level"] = pd.cut(data["Calories"], bins=bins, labels=labels)
counts = data["Calorie_Level"].value_counts()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Bar chart
counts.plot(kind="bar", ax=axes[0], edgecolor="black")
axes[0].set_title("Food Count by Calorie Level")
axes[0].set_xlabel("Calorie Level")
axes[0].set_ylabel("Count")

# Pie chart
axes[1].pie(
    counts,
    labels=counts.index,
    autopct="%1.1f%%",
    startangle=140
)
axes[1].set_title("Calorie Distribution")

plt.tight_layout()
plt.show()


In [None]:
from collections import Counter

ingredients = (
    data["RecipeIngredientParts"]
    .dropna()
    .astype(str)
    .str.lower()
    .str.split(",")
)

ingredient_list = [
    item.strip()
    for sublist in ingredients
    for item in sublist
    if item.strip()
]

top_ingredients = Counter(ingredient_list).most_common(10)
top_ingredients


In [None]:
import pandas as pd
import plotly.express as px

ing_df = pd.DataFrame(top_ingredients, columns=["Ingredient", "Count"])

fig = px.bar(
    ing_df,
    x="Ingredient",
    y="Count",
    title="Top 10 Most Used Ingredients",
    text="Count"
)
fig.show()



In [None]:
top_rated = data.sort_values("AggregatedRating", ascending=False).head(10)

fig = px.bar(
    top_rated,
    x="Name",
    y="AggregatedRating",
    color="Calories",
    title="Top 10 High-Rated Recipes"
)
fig.show()


In [None]:
cuisine_rating = (
    data.groupby("RecipeCategory")["AggregatedRating"]
    .mean()
    .sort_values(ascending=False)
)

fig = px.bar(
    cuisine_rating,
    title="Average Rating by Recipe Category"
)
fig.show()


In [None]:
fig = px.scatter(
    data,
    x="Calories",
    y="AggregatedRating",
    color="RecipeCategory",
    hover_data=["Name"],
    title="Calories vs Rating"
)
fig.show()


In [None]:
nutrition_cols = [
    "Calories",
    "FatContent",
    "SaturatedFatContent",
    "CholesterolContent",
    "SodiumContent",
    "CarbohydrateContent",
    "SugarContent",
    "ProteinContent",
    "FiberContent"
]


In [None]:
max_list = [500, 20, 10, 300, 800, 60, 25, 30, 10]


In [None]:
extracted_data = data.copy()

for column, maximum in zip(nutrition_cols, max_list):
    extracted_data = extracted_data[extracted_data[column] < maximum]

In [None]:
print("Original recipes:", data.shape[0])
print("Healthy recipes:", extracted_data.shape[0])


In [None]:
extracted_data = data.copy()

for column, maximum in zip(nutrition_cols, max_list):
    extracted_data = extracted_data[
        extracted_data[column].notna() &
        (extracted_data[column] < maximum)
    ]


In [None]:
recommended = extracted_data.sort_values(
    by="AggregatedRating",
    ascending=False
).head(10)

recommended[["Name", "AggregatedRating", "Calories"]]


In [None]:
extracted_data.info()

In [None]:
corr_matrix = extracted_data[nutrition_cols].corr()
corr_matrix


In [None]:
corr_matrix = extracted_data.select_dtypes(include="number").corr()
corr_matrix


In [None]:
import seaborn as sns
plt.figure(figsize=(10, 8))
sns.heatmap(
    corr_matrix,
    annot=True,
    cmap="coolwarm",
    fmt=".2f"
)
plt.title("Nutrition Correlation Heatmap")
plt.show()


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

prep_data = scaler.fit_transform(
    extracted_data[nutrition_cols]
)

In [None]:
prep_data = scaler.fit_transform(
    extracted_data[nutrition_cols].fillna(0)
)


In [None]:
prep_data.shape


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_by_index(index, top_n=5):
    target = prep_data[index].reshape(1, -1)
    similarities = cosine_similarity(target, prep_data)[0]
    
    similar_indices = np.argsort(similarities)[::-1][1:top_n+1]
    
    recs = extracted_data.iloc[similar_indices]
    recs = recs.sort_values("AggregatedRating", ascending=False)
    
    return recs[["Name", "AggregatedRating", "Calories"]]


In [None]:
recommend_by_index(10, top_n=5)


In [None]:
prep_data = scaler.fit_transform(
    extracted_data[nutrition_cols].fillna(0)
)
prep_data[:5]


In [None]:
prep_df = pd.DataFrame(
    prep_data,
    columns=nutrition_cols,
    index=extracted_data.index
)

prep_df.head()


In [None]:
from sklearn.neighbors import NearestNeighbors

nn_model = NearestNeighbors(
    n_neighbors=10,
    metric="cosine",
    algorithm="brute"
)

nn_model.fit(prep_data)


In [None]:
import numpy as np

def recommend_recipes(index, top_n=5):
    distances, indices = nn_model.kneighbors(
        prep_data[index].reshape(1, -1),
        n_neighbors=top_n + 1
    )

    # Remove the input recipe itself
    indices = indices.flatten()[1:]

    return extracted_data.iloc[indices][
        ["Name", "AggregatedRating", "Calories"]
    ].sort_values("AggregatedRating", ascending=False)


In [None]:
recommend_recipes(0, top_n=5)

In [None]:
extracted_data[extracted_data['RecipeIngredientParts'].str.contains("egg",regex=False)]

In [None]:
def extract_data(dataframe, nutrition_cols, max_values, ingredient_filter=None):
    df = dataframe.copy()

    # Nutrition filtering
    for col, max_val in zip(nutrition_cols, max_values):
        df = df[df[col].notna() & (df[col] < max_val)]

    # Ingredient filtering
    if ingredient_filter:
        for ing in ingredient_filter:
            df = df[df["RecipeIngredientParts"].str.contains(ing, case=False, regex=False)]

    return df


In [None]:
from sklearn.preprocessing import StandardScaler

def scale_features(df, nutrition_cols):
    scaler = StandardScaler()
    X = scaler.fit_transform(df[nutrition_cols])
    return X, scaler


In [None]:
from sklearn.neighbors import NearestNeighbors

def train_knn(X, n_neighbors=10):
    model = NearestNeighbors(
        n_neighbors=n_neighbors,
        metric="cosine",
        algorithm="brute"
    )
    model.fit(X)
    return model


In [None]:
def recommend(index, model, X, df, top_n=5):
    distances, indices = model.kneighbors(
        X[index].reshape(1, -1),
        n_neighbors=top_n + 1
    )

    indices = indices.flatten()[1:]  # remove itself

    return (
        df.iloc[indices]
        .sort_values("AggregatedRating", ascending=False)
        [["Name", "AggregatedRating", "Calories", "RecipeCategory"]]
    )


In [None]:
def recommend_system(
    dataframe,
    nutrition_cols,
    max_values,
    recipe_index,
    ingredient_filter=None,
    top_n=5
):
    filtered_df = extract_data(
        dataframe,
        nutrition_cols,
        max_values,
        ingredient_filter
    )

    X, scaler = scale_features(filtered_df, nutrition_cols)
    model = train_knn(X, n_neighbors=top_n + 1)

    return recommend(recipe_index, model, X, filtered_df, top_n)


In [None]:
fig = px.scatter(
    extracted_data,
    x="Calories",
    y="AggregatedRating",
    size="ProteinContent",
    color="RecipeCategory",
    hover_data=["Name"],
    title="Calories vs Rating vs Protein"
)
fig.show()
