In [19]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import pickle

In [5]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word, idx=None):
        if idx is None:
            if not word in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1
            return self.idx
        else:
            if not word in self.word2idx:
                self.word2idx[word] = idx
                if idx in self.idx2word.keys():
                    self.idx2word[idx].append(word)
                else:
                    self.idx2word[idx] = [word]

                return idx

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<pad>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)

In [6]:
folder = "D:\\Documents\\food_recipe_gen\\recipe_1m_analysis"
files = ["allingrs_count.pkl","allwords_count.pkl","recipe1m_train.pkl",
         "recipe1m_vocab_ingrs.pkl","recipe1m_vocab_toks.pkl"]

with open(os.path.join(folder,"data",files[3]),'rb') as f:
    vocab_ingrs=pickle.load(f)

with open(os.path.join(folder,"data",files[4]),'rb') as f:
    vocab_toks=pickle.load(f)

In [20]:
class RecipesDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, file):
        """
        Args:
            file (string): Path to the file
        """
        with open(file,'rb') as f:
            self.data=pickle.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        sample = {'title': self.data[idx]["title"], 
                  'ingredients': self.data[idx]["ingredients"],
                  'instructions':self.data[idx]["tokenized"],}

        return sample

In [22]:
recipe_dataset=RecipesDataset(os.path.join(folder,"data",files[2]))
dataset_loader = torch.utils.data.DataLoader(recipe_dataset,
                                             batch_size=32, shuffle=True,
                                             num_workers=4)

In [23]:
recipe_dataset[0]

{'title': ['dilly', 'macaroni', 'salad', 'recipe'],
 'ingredients': ['elbow_macaroni',
  'american_cheese',
  'celery',
  'green_peppers',
  'pimentos',
  'mayonnaise',
  'vinegar',
  'salt',
  'dry_dill_weed'],
 'instructions': [['cook',
   'macaroni',
   'according',
   'to',
   'package',
   'directions',
   ';',
   'drain',
   'well',
   '.'],
  ['cold', '.'],
  ['combine',
   'macaroni',
   ',',
   'cheese',
   'cubes',
   ',',
   'celery',
   ',',
   'green',
   'pepper',
   'and',
   'pimento',
   '.'],
  ['blend',
   'together',
   'mayonnaise',
   'or',
   'possibly',
   'salad',
   'dressing',
   ',',
   'vinegar',
   ',',
   'salt',
   'and',
   'dill',
   'weed',
   ';',
   'add',
   'in',
   'to',
   'macaroni',
   'mix',
   '.'],
  ['toss', 'lightly', '.'],
  ['cover', 'and', 'refrigeratewell', '.'],
  ['serve',
   'salad',
   'in',
   'lettuce',
   'lined',
   'bowl',
   'if',
   'you',
   'like',
   '.'],
  ['makes', '6', 'servings', '.']]}