In [11]:
import fasttext

In [2]:
help(fasttext.FastText)

Help on module fasttext.FastText in fasttext:

NAME
    fasttext.FastText

DESCRIPTION
    # Copyright (c) 2017-present, Facebook, Inc.
    # All rights reserved.
    #
    # This source code is licensed under the MIT license found in the
    # LICENSE file in the root directory of this source tree.

FUNCTIONS
    cbow(*kargs, **kwargs)
    
    eprint(*args, **kwargs)
    
    load_model(path)
        Load a model given a filepath and return a model object.
    
    read_args(arg_list, arg_dict, arg_names, default_values)
    
    skipgram(*kargs, **kwargs)
    
    supervised(*kargs, **kwargs)
    
    tokenize(text)
        Given a string of text, tokenize it and return a list of tokens
    
    train_supervised(*kargs, **kwargs)
        Train a supervised model and return a model object.
        
        input must be a filepath. The input text does not need to be tokenized
        as per the tokenize function, but it must be preprocessed and encoded
        as UTF-8. You might wan

In [5]:
!head /work/data/fasttext/cooking.stackexchange.txt

__label__sauce __label__cheese How much does potato starch affect a cheese sauce recipe?
__label__food-safety __label__acidity Dangerous pathogens capable of growing in acidic environments
__label__cast-iron __label__stove How do I cover up the white spots on my cast iron stove?
__label__restaurant Michelin Three Star Restaurant; but if the chef is not there
__label__knife-skills __label__dicing Without knife skills, how can I quickly and accurately dice vegetables?
__label__storage-method __label__equipment __label__bread What's the purpose of a bread box?
__label__baking __label__food-safety __label__substitutions __label__peanuts how to seperate peanut oil from roasted peanuts at home?
__label__chocolate American equivalent for British chocolate terms
__label__baking __label__oven __label__convection Fan bake vs bake
__label__sauce __label__storage-lifetime __label__acidity __label__mayonnaise Regulation and balancing of readymade packed mayonnaise and other sauces


In [8]:
!wc /work/data/fasttext/cooking.stackexchange.txt

  15404  169582 1401900 /work/data/fasttext/cooking.stackexchange.txt


In [9]:
!ls /work/data/fasttext/

cooking.stackexchange.txt  cooking.train  cooking.valid


In [13]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train")

In [14]:
model.save_model("model_cooking.bin")

In [15]:
model.predict("Whick baking dish is best to bake a banana bread ?")

(('__label__baking',), array([0.08752731]))

In [16]:
model.predict("why not put knives in the dishwasher ?")

(('__label__food-safety',), array([0.04280162]))

In [17]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.14633333333333334, 0.0632838402767767)

In [20]:
model.predict("Why not put knives in the dishwasher ?", k=5)

(('__label__baking',
  '__label__food-safety',
  '__label__bread',
  '__label__equipment',
  '__label__substitutions'),
 array([0.05216507, 0.0521493 , 0.03053297, 0.02940539, 0.02573603]))

In [31]:
ls /work/data/fasttext

cooking.preprocessed.txt   cooking.train
cooking.stackexchange.txt  cooking.valid


In [33]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train")

In [34]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.15733333333333333, 0.06804093988755947)

In [35]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", epoch=25)

In [36]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.516, 0.22315121810580943)

In [37]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", lr=1.0)

In [38]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.569, 0.24607178895776272)

In [40]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", lr=1.0, epoch=25)

In [42]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.58, 0.25082888856854546)

In [43]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", lr=1.0, epoch=25, wordNgrams=2)

In [44]:
model.test("/work/data/fasttext/cooking.valid")

(3000, 0.6016666666666667, 0.26019893325645094)

In [47]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", lr=1.0, epoch=25, wordNgrams=2, bucket=200000, dim=50, loss='hs')

In [61]:
model = fasttext.train_supervised(input="/work/data/fasttext/cooking.train", lr=0.5, epoch=25, wordNgrams=2, bucket=200000, dim=50, loss='ova')

In [62]:
model.predict("Which baking dish is best to bake a banana bread ?", k=-1, threshold=0.5)

(('__label__baking',
  '__label__bread',
  '__label__equipment',
  '__label__bananas'),
 array([1.00001001, 0.99802226, 0.97703266, 0.94335759]))

In [71]:
model.test("/work/data/fasttext/cooking.valid", k=-1, threshold=0.8)

(3000, 0.7163120567375887, 0.20383451059535823)