In [3]:
from fastai import *
from fastai.text import * 

In [4]:
import pandas as pd

In [5]:
import numpy as np

In [6]:
df = pd.read_csv('data/quotes.csv', sep=r'\<\|\>', header=None, names=['text', 'author'])

  """Entry point for launching an IPython kernel.


In [7]:
df.head()

Unnamed: 0,text,author
0,A serious and good philosophical work could be...,Wittgenstein
1,The limits of my language means the limits of ...,Wittgenstein
2,"I don't know why we are here, but I'm pretty s...",Wittgenstein
3,"Whereof one cannot speak, thereof one must be ...",Wittgenstein
4,Hell isn't other people. Hell is yourself.,Wittgenstein


In [8]:
df['text'] = df['text'].str.lower()

In [9]:
valid_pct = 0.05 #validation percent
df = df.iloc[np.random.permutation(len(df))]
cut = int(valid_pct * len(df)) + 1
train_df, valid_df = df[cut:], df[:cut]

In [10]:
len(train_df), len(valid_df)

(4873, 257)

In [11]:
nan_rows = df[df['text'].isnull()]

In [13]:
nan_rows

Unnamed: 0,text,author


In [13]:
data_lm = TextLMDataBunch.from_df('data', train_df, valid_df, text_cols='text',
                           label_cols='author')

In [9]:
# data_clas = TextClasDataBunch.from_df('data', train_df, valid_df, text_cols='text', label_cols='author', vocab=data_lm.train_ds.vocab, bs=32)

In [14]:
learn = language_model_learner(data_lm, pretrained_model=URLs.WT103, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2)

epoch,train_loss,valid_loss,accuracy
1,4.767370,4.369649,0.220312


In [16]:
learn.unfreeze()
learn.fit_one_cycle(1, 1e-3)

epoch,train_loss,valid_loss,accuracy
1,4.187985,3.925991,0.298106


In [17]:
wd=1e-7
lr=1e-3
lrs = lr

In [18]:
learn.fit(10,lrs, wd)

epoch,train_loss,valid_loss,accuracy
1,4.038457,3.849872,0.302273
2,3.946853,3.801399,0.305587
3,3.858981,3.764411,0.310322
4,3.754583,3.732944,0.314489
5,3.665370,3.707683,0.321875
6,3.568224,3.689430,0.322917
7,3.464149,3.677289,0.326705
8,3.362748,3.663580,0.329167
9,3.269804,3.671115,0.332102
10,3.171323,3.646297,0.334943


In [19]:
learn.predict("xxbos", n_words=50, temperature=0.75)

'xxbos xxfld 1 the human body is a species of human being . xxbos xxfld 1 a great deal of time , every were already free in order to be paid . it was his evening life , of what he was , in a room where monsters and monsters often'

In [26]:
number_of_ideas = 100
ideas_counter = 0
all_ideas = []

for i in range(1000):
    idea = learn.predict("xxbos xxfld 1", n_words=20, temperature=0.8)
    ideas = idea.split("xxbos xxfld 1")
    ideas = ideas[1:-1]
    
    for idea in ideas:
        idea = idea.replace("xxbos xxfld 1 ","").strip()
        if(idea):
            all_ideas.append(idea)
            ideas_counter = ideas_counter+1
            
    if ideas_counter > number_of_ideas:
        break

In [27]:
all_ideas

['por las vida de los mundo de los mundo en el chi',
 'the truth is that we can not be sure of what we do not know .',
 'according to the standard , man is the real animal .',
 "it is my business that i think that 's what i do n't know .",
 'after a adventure , it was not until that point that old ideas were drawn up .',
 'the lives of a finite player player must be avoided .',
 'a human being is also an animal .',
 'i had to turn',
 "i want people to be happy but i do n't want to be ourselves .",
 'there is a greater freedom for the philosophers and for the philosophers .',
 'for a moment is not merely a thought , but a tragedy .',
 'at this stage it is the true art .',
 'i am the bridge , which is the bridge and the one that carries the bridge .',
 'it is the wisdom that the world has not yet seen established at all .',
 'the future is great because we love everything .',
 'what is the belief in the right to man ?',
 'nature is a perfect field of vision .',
 't',
 'to learn to draw i

In [20]:
learn.save_encoder('ft_enc')

In [21]:
train_df.to_pickle('data/train_df.pkl')

In [22]:
valid_df.to_pickle('data/valid_df.pkl')