# Plotly visualisation

## 1. Bar chart
Bar chart of the number of recipes per category or subcategory

In [None]:
import pandas as pd
import plotly.express as px

In [None]:
df = pd.read_json("data/structured_data.json", orient="table")
df.head()

In [None]:
# Bar chart using plotly
recipes_count_per_category = df.groupby("category").count()["recipe_name"].reset_index()
recipes_count_per_category.columns = ["Категория", "Число рецептов"]

# Sort by number of recipes
recipes_count_per_category = recipes_count_per_category.sort_values(by="Число рецептов", ascending=False)
recipes_count_per_category

In [None]:
# Disable x-axis labels
fig = px.bar(
    recipes_count_per_category,
    x="Категория",
    y="Число рецептов",
    title="Число рецептов по категориям",
    color="Категория",
)
fig.update_xaxes(showticklabels=False, title=None)
fig.update_layout(
    font_family="CupheadMemphis",
    title_font_family="CupheadMemphis",
)
fig.show()

# Save figure
fig.write_image("visualisation/images/recipes_count_per_category.svg")

In [None]:
## For each category show a bar chart with the number of recipes per subcategory
recipes_count_per_subcategory = df.groupby(["category", "subcategory"]).count()["recipe_name"].reset_index()
recipes_count_per_subcategory.columns = ["Категория", "Подкатегория", "Число рецептов"]

# Sort by count per each category
recipes_count_per_subcategory = recipes_count_per_subcategory.sort_values(
    by=["Категория", "Число рецептов"], ascending=[True, False]
)
recipes_count_per_subcategory

In [None]:
for category in recipes_count_per_subcategory["Категория"].unique():
    fig = px.bar(
        recipes_count_per_subcategory[recipes_count_per_subcategory["Категория"] == category],
        x="Подкатегория",
        y="Число рецептов",
        title=f"Число рецептов по подкатегориям в категории {category}",
        color="Подкатегория",
    )
    fig.update_xaxes(showticklabels=False, title=None)
    fig.update_layout(
        font_family="CupheadMemphis",
        title_font_family="CupheadMemphis",
    )
    fig.show()

    # Save figure
    fig.write_image(f"visualisation/images/recipes_count_per_subcategory_{category}.svg")

## 2. Word cloud
Word cloud of the most commonly used ingredients

In [None]:
import pandas as pd

from collections import defaultdict

In [None]:
df = pd.read_json("data/structured_data.json", orient="table")
df.head()

In [None]:
# Word cloud
ingredients_list_counts = defaultdict(int)

for ingredients in df["parsed_ingredients"]:
    for ingredient in ingredients:
        if ingredient:
            ingredients_list_counts[ingredient[0]] += 1

# Sort by count
ingredients_list_counts = dict(sorted(ingredients_list_counts.items(), key=lambda x: x[1], reverse=True))

# Save to file
with open("visualisation/data/ingredients.txt", "w", encoding="utf-8") as f:
    for ingredient, count in ingredients_list_counts.items():
        if ingredient is not None:
            f.write(f"{ingredient} {count}\n")

In [None]:
# Show svg file
from IPython.display import SVG, display

display(SVG(filename="visualisation/images/wordcloud.svg"))

## 3. Interactive Network Visualization

In [None]:
import json
import os
import pandas as pd

In [None]:
df = pd.read_json("data/structured_data.json", orient="table")
df.head()

In [None]:
recipes_ingredients = []

for category, subcategory, recipe_name, ingredients in zip(df["category"], df["subcategory"], df["recipe_name"], df["parsed_ingredients"]):
    recipes_ingredients.append(
        {
            "category": category,
            "subcategory": subcategory,
            "recipe_name": recipe_name,
            "ingredients": [
                ingredient[0] for ingredient in ingredients if ingredient[0]
            ]
        }
    )

recipes_ingredients

### We need to gather data in the following format:
```json
{
  "nodes": [
    {
      "is_recipe": true,
      "recipe_name": "recipe name",
      "id": "unique_id_1",
    },
    {
      "is_recipe": false,
      "ingredient": "ingredient name",
      "id": "unique_id_2",
    },
    {
      ...
    },
  ],
  "links": [
    {
      "source": "unique_id_1",
      "target": "unique_id_2"
    },
    {
      ...
    }
  ]
}
```


In [None]:
recipe_last_id = 0
ingredient_last_id = 0

ingredients_set = {}  # set of unique ingredients

for category in df["category"].unique():
    for subcategory in df[df["category"] == category]["subcategory"].unique():
        recipes_ingredients_per_subcategory = {
            recipe["recipe_name"]: recipe["ingredients"] for recipe in recipes_ingredients
            if recipe["category"] == category and recipe["subcategory"] == subcategory
        }

        ingredient_nodes = []
        ingredients_current_set = set()

        recipe_nodes = []

        recipe_links = []

        for i, (recipe_name, recipe_ingredients) in enumerate(recipes_ingredients_per_subcategory.items()):
            for ingredient in recipe_ingredients:
                if ingredient is None:
                    continue

                if ingredient not in ingredients_set:
                    # Assing unique id to each ingredient
                    ingredients_set[ingredient] = ingredient_last_id
                    ingredient_last_id += 1

                if ingredient not in ingredients_current_set:
                    ingredients_current_set.add(ingredient)

                    # Add ingredient node
                    ingredient_nodes.append(
                        {
                            "id": f"i{ingredients_set[ingredient]}",
                            "is_recipe": False,
                            "name": ingredient
                        }
                    )

                # Add links between recipe and ingredients
                recipe_links.append(
                    {
                        "source": f"r{recipe_last_id}",
                        "target": f"i{ingredients_set[ingredient]}"
                    }
                )

            # Add recipe node
            recipe_nodes.append(
                {
                    "id": f"r{recipe_last_id}",
                    "is_recipe": True,
                    "name": recipe_name
                }
            )

            recipe_last_id += 1

        # Create output directory
        os.makedirs(f"visualisation/data/network/{category}", exist_ok=True)

        # Save to file
        with open(f"visualisation/data/network/{category}/{subcategory}.json", "w", encoding="utf-8") as f:
            json.dump(
                {
                    "nodes": recipe_nodes + ingredient_nodes,
                    "links": recipe_links
                },
                f,
                ensure_ascii=False
            )

In [None]:
# Create index file with paths to all subcategories
with open("visualisation/data/network/index.json", "w", encoding="utf-8") as f:
    json.dump(
        {
            category: [
                subcategory for subcategory in df[df["category"] == category]["subcategory"].unique()
            ] for category in df["category"].unique()
        },
        f,
        ensure_ascii=False
    )