In [7]:
from data_processor import DataProcessor
import ngram_authorship_classifier as nac
import os
import json

In [8]:
prompts = ["The lady", "he thought", "I cannot", "she was", "the world" ]

author_prompt = dict()

### Load the data

In [9]:
data_dir = "ngram_authorship_train"

In [10]:
author_files = ["austen_utf8.txt","dickens_utf8.txt","tolstoy_utf8.txt","wilde_utf8.txt"]
author_names = ["Austen","Dickens","Tolstoy","Wilde"]

data_proc = DataProcessor()

authors_train_data = dict()
authors_test_data = dict()
for i in range(len(author_files)):
    print("Processing data for author: " + author_names[i])
    trainset,_ = data_proc.process_file(os.path.join(data_dir, author_files[i]))
    authors_train_data[author_names[i]] = trainset


  from .autonotebook import tqdm as notebook_tqdm


Processing data for author: Austen
Processing data for author: Dickens
Processing data for author: Tolstoy
Processing data for author: Wilde


### Instantiate the models

In [11]:
model1 = nac.NgramAuthorshipClassifier(n=1, smoothing = 'lp')
model2 = nac.NgramAuthorshipClassifier(n=2, smoothing = 'lp')
model3 = nac.NgramAuthorshipClassifier(n=3, smoothing = 'lp')

### Train the model

In [12]:
print("Training Unigram Model...")
model1.train(authors_train_data)

Training Unigram Model...
Training LMs... (this may take a while)


In [13]:
print("Training Bigram Model...")
model2.train(authors_train_data)

Training Bigram Model...
Training LMs... (this may take a while)


In [14]:
print("Training Trigram Model...")
model3.train(authors_train_data)

Training Trigram Model...
Training LMs... (this may take a while)


Generate samples function

In [15]:
def generate_sample_text(authors, prompts,model):
    return model.generate_authors_text(authors = authors,prompts= prompts)

### Generating samples

In [16]:
print("Generating sample text for Unigram Model...")
model1_text = generate_sample_text(author_names,prompts,model1)

Generating sample text for Unigram Model...


In [17]:
print("Generating sample text for Bigram Model...")
model2_text = generate_sample_text(author_names,prompts,model2)

Generating sample text for Bigram Model...


In [18]:
print("Generating sample text for Trigram Model...")
model3_text = generate_sample_text(author_names,prompts,model3)

Generating sample text for Trigram Model...


### Save to files

In [19]:
def save_generated_text(text_dict, filename):
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(text_dict, f, indent=4, ensure_ascii=False)


In [20]:
print("Saving generated text for Unigram Model...")
save_generated_text(model1_text, "model1_text.json")

Saving generated text for Unigram Model...


In [23]:
print(model1_text)

{'Austen': ['The lady did stable about the all of complete know sometimes not glad , imprudence " the you hedges his My of', 'he thought that fronted , conjecturing a all unhappy can of day ladyship others pleasure mind plainly of , standing ; is', 'I cannot ’s of encumbered , was or in Taylor if home should might out harp that of Knightley augmented result she', 'she was in town be its now in , that no undoubtedly ideas is married been ribbon to , " go all', 'the world . , it to it triumph a aired every ; to , merits I dependence . played admiration breakfast engaged'], 'Dickens': ['The lady my was are I long lingering no , I delightful . working The was . down “ to felt found', 'he thought an ’ and that . told Oh me that , man make such out and anxious ” advocates in ,', 'I cannot , we start close pattern liberty see with and his Jerry . comical wants application at ’ the desire ,', 'she was her to - could , of It as the , Dreaming appearance the have next was Well from I sweetest', 

In [21]:
print("Saving generated text for Bigram Model...")
save_generated_text(model2_text, "model2_text.json")

Saving generated text for Bigram Model...


In [24]:
print(model2_text)

{'Austen': ['The lady ! the value ; and with them to provocation . Frank Churchill , from the conviction that your friends ,', 'he thought her friend by his behaviour to what any hesitation in fortune ; as mine was not know better . ”', 'I cannot -- about her expectation and eaten very little projected ball , </s> will only given and very kind attention particularly', 'she was sorry for ; and body can suppose Mr. Knightley came on the profits of scrutinies , " but I heard', 'the world to find he had every thing could surpass . There is cast down at my terms they were to contend'], 'Dickens': ['The lady . ” If his own mind . I do you , ” </s> sound of entertaining me how natural feeling', 'he thought of keeping guard , risen against all ? Up to anatomise his face to defy competition there , if I', 'I cannot appertaining to - wind which you any displeasure ; and I say I thought she held forth a bawling and', 'she was silent and freshness pouring out of the stone face upon the crust being

In [22]:
print("Saving generated text for Trigram Model...")
save_generated_text(model3_text, "model3_text.json")

Saving generated text for Trigram Model...


In [25]:
print(model3_text)

{'Austen': ['The lady then — only we do indeed . She was pleased ; and I are both exceedingly well married , whether', 'he thought of Mrs. Ferrars;—that her interest , and walked in with a “ Yes I should never hear it ! </s>', 'I cannot -- I have heard her mentioned ; except that such extreme and perpetual cautiousness of conduct , and more than', 'she was much affection in the solitude of her still well enough , handsome , and they were engaged about the time', 'the world . You see how it might be secured for the arrangement . </s> </s> </s> </s> </s> </s> </s> </s>'], 'Dickens': ['The lady and gentleman beside her till she was from the knitted register of Madame Defarge ’s olfactory sense was by that', 'he thought the simple confidence reposed in us by the window , looking over his evidence , and looking pensively at me', 'I cannot chose ; and entertained us with pride , as well as ever . </s> </s> </s> </s> </s> </s> </s>', 'she was wiping her eyes ; and that made their last journey