In [1]:
import torch

In [2]:
import transformers
from transformers import BertForSequenceClassification, BertTokenizer

In [3]:
model = torch.load('model/BERT_model', map_location=torch.device('cpu'))
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [5]:
# converting to techniques list 
from recipes import Recipes
cuukin = Recipes()
cuukin.import_data(listings_folder='listings', evaluation_folder='evaluation')

In [6]:
def find_techniques(description, treshold = 0.5):
    techniques_list = list(cuukin.listings['techniques']['name'])

    input = tokenizer(description, return_tensors='pt')
    labels = torch.LongTensor([0])
    output = model(**input, labels=labels)
    logits = output.logits.detach().squeeze()
    probabilities = torch.sigmoid(logits)
    predicted_bools = [(prob>treshold).item() for prob in probabilities]

    predicted_techniques = [technique for (technique, boolean) in zip(techniques_list, predicted_bools) if boolean]

    return predicted_techniques

In [7]:
df = cuukin.evaluation_data['recipe_methods']['description'][0:10].map(find_techniques)

In [8]:
df

id
0    [Measuring, Microwaving, Air Frying, Double Bo...
1    [Measuring, Microwaving, Air Frying, Double Bo...
2    [Measuring, Microwaving, Air Frying, Double Bo...
3    [Measuring, Microwaving, Air Frying, Double Bo...
4    [Measuring, Microwaving, Air Frying, Double Bo...
5    [Measuring, Microwaving, Air Frying, Double Bo...
6    [Measuring, Microwaving, Air Frying, Double Bo...
7    [Measuring, Microwaving, Air Frying, Double Bo...
8    [Measuring, Microwaving, Air Frying, Double Bo...
9    [Measuring, Microwaving, Air Frying, Double Bo...
Name: description, dtype: object

In [9]:
import pandas as pd
cuukin.output_data['recipe_techniques'] = pd.DataFrame(columns=['method_id', 'technique'])
cuukin.output_data['recipe_techniques']

Unnamed: 0,method_id,technique


In [10]:
for method_id in range(df.shape[0]):
    for technique in df[method_id]:
        cuukin.output_data['recipe_techniques'] = cuukin.output_data['recipe_techniques'].append({
            'method_id': method_id,
            'technique': technique
        }, ignore_index=True)

In [11]:
cuukin.output_data['recipe_techniques']

Unnamed: 0,method_id,technique
0,0,Measuring
1,0,Microwaving
2,0,Air Frying
3,0,Double Boiler
4,0,Bain Marie
...,...,...
194,9,Kneading
195,9,Mixing
196,9,Freezing
197,9,Seasoning


In [12]:
cuukin.evaluation_data["recipe_methods"].head()

Unnamed: 0_level_0,recipe_id,description
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,Heat the oil in a frying pan and gently fry th...
1,0,"Add the chickpeas, harissa and tomatoes and co..."
2,0,Add a squeeze of lemon juice and season with s...
3,0,Spoon the yoghurt onto two plates or shallow b...
4,1,Bring a small saucepan of water to the boil an...


In [18]:
# assigning recipe_id_values

assign_recipe_id = lambda x: cuukin.evaluation_data["recipe_methods"].at[x, 'recipe_id']

cuukin.output_data['recipe_techniques']['recipe_id'] = cuukin.output_data['recipe_techniques']['method_id'].map(assign_recipe_id)

In [19]:
cuukin.output_data['recipe_techniques']

Unnamed: 0,method_id,technique,recipe_id
0,0,Measuring,0
1,0,Microwaving,0
2,0,Air Frying,0
3,0,Double Boiler,0
4,0,Bain Marie,0
...,...,...,...
194,9,Kneading,2
195,9,Mixing,2
196,9,Freezing,2
197,9,Seasoning,2
