<a href="https://colab.research.google.com/github/ericburdett/cs673-personal-tutor/blob/master/Personal_Tutor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Personal Tutor

This notebook contains code for the Personal Tutor System built for CS673: Computational Creativity.


## Imports and Setup

Restart the Runtime after running the top code block.

In [0]:
!pip install transformers
!python -m spacy download en_core_web_md

In [1]:
import torch
import torch.nn.functional as F
import pdb
import string
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import spacy
import numpy as np
# # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
# import logging
# logging.basicConfig(level=logging.INFO)

## GPT2 - Transformers Example


In [0]:
class LanguageModel():
  def __init__(self, mask=None, k=50):
    self.model = GPT2LMHeadModel.from_pretrained('distilgpt2').cuda()
    self.tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
    self.k = k
    self.mask = mask

  def top_k_logits(self, logits):
    if self.k == 0:
        return logits
    values, _ = torch.topk(logits, self.k)
    min_values = values[-1]
    return torch.where(logits < min_values, torch.ones_like(logits, dtype=logits.dtype) * -1e10, logits)

  def set_mask(self, mask):
    self.mask = mask

  def get_sentence(self, prompt, length):
    generated = self.tokenizer.encode(prompt)
    context = torch.tensor([generated]).cuda()

    past = None

    for i in range(length):
      output, past = self.model(context, past=past)
      
      logits = output[..., -1, :].squeeze()

      topk_logits = self.top_k_logits(logits)
      topk_log_probs = F.softmax(topk_logits, dim=-1)
      token = torch.multinomial(topk_log_probs, num_samples=1)

      generated += [token.item()]
      context = token.unsqueeze(0)
    
    sequence = self.tokenizer.decode(generated)

    end_index = len(prompt.split('.'))

    return ".".join(sequence.split('.')[0:end_index]) + '.'

In [0]:
class Evaluator():
  def __init__(self):
    pass

  def get_keywords(self, sentence_doc):
    # Find Nouns and Adjectives
    keywords = []
    for token in sentence_doc:
      pos = token.pos
      if pos in [92, 96]: # NOUN, PNOUN, ADJ , 84
         keywords.append(token)
    
    return keywords

  def get_random_pairs(self, arr, size):
    pairs = []

    try:
      for i in range(size):
        pair = np.random.choice(arr, size=2, replace=False)
        pairs.append(pair)
    except:
      return None

    return pairs

  def word_length_score(self, sentence_doc):
    num_words = len(sentence_doc)

    target_high = 10
    target_low = 6

    if num_words >= target_low and num_words <= target_high:
      return .8
    elif num_words + 1 >= target_low:
      return .6
    elif num_words + 2 >= target_low:
      return .4
    elif num_words + 3 >= target_low:
      return .2
    elif num_words - 2 <= target_high:
      return .6
    elif num_words - 4 <= target_high:
      return .4
    elif num_words - 6 <= target_high:
      return .2
    elif num_words - 10 <= target_high:
      return .1
    else:
      return 0

  def topic_score(self, sentence_doc, topic_doc):
    keywords = self.get_keywords(sentence_doc)
    if len(keywords) < 2:
      return 0

    similarities = []
    for keyword in keywords:
      similarity = topic_doc.similarity(keyword)
      similarities.append(similarity)

    return np.mean(similarities)
  
  def related_score(self, sentence_doc):
    keywords = self.get_keywords(sentence_doc)

    # Sample Random Pairs
    pairs = self.get_random_pairs(keywords, 10) # 10 seems like a good number for now...
    if pairs == None or len(pairs) == 0:
      return 0

    # Check Similarity
    similarities = []
    for pair in pairs:

      if pair[0].vector_norm and pair[1].vector_norm:
        similarity = pair[0].similarity(pair[1])
      else:
        similarity = 0

      if similarity >= 1: # Do not give a high similarity score if we are comparing a word with itself
        similarity = 0

      similarities.append(similarity)
      # print('Comparing {} with {}, score: {:.4f}'.format(pair[0], pair[1], similarity))
    
    # print(similarities)

    return np.mean(similarities)

In [0]:
def remove_prompt(sentence, prompt):
  new_sentence = sentence.split(prompt)
  if len(new_sentence) < 2:
    return ''
  else:
    return sentence.split(prompt)[1]

In [0]:
LM = LanguageModel(k=10)
EVAL = Evaluator()
NLP = spacy.load('en_core_web_md')

In [16]:
# Main App Loop
NUM_SENTENCES = 50
MAX_SENTENCE_LENGTH = 35
TOPIC = 'Religion'

topic_doc = NLP(TOPIC)
prompts = ['Book of Mormon passage:']
prompts_truncate = [False]

while True:

  best_score = 0
  best_sentence = ''

  for i in range(NUM_SENTENCES):
    rand_index = np.random.randint(len(prompts))
    sentence_prompt = prompts[rand_index]
    should_truncate = prompts_truncate[rand_index]
    
    sentence = LM.get_sentence(sentence_prompt, MAX_SENTENCE_LENGTH)
    if should_truncate:
      sentence = remove_prompt(sentence, sentence_prompt)
    
    sentence = sentence.strip(' \n,\'`~"[]{}\\-+=<>')
    sentence = sentence.replace('\n', '')
    sentence_doc = NLP(sentence)
    topic_score = EVAL.topic_score(sentence_doc, topic_doc)
    related_score = EVAL.related_score(sentence_doc)
    length_score = EVAL.word_length_score(sentence_doc)
    total_score = np.mean([topic_score, related_score, length_score])

    print('[{}]: {} - Topic: {}, Related: {}, Length: {}, Overall: {}'.format(i, sentence, topic_score, related_score, length_score, total_score))

    if total_score > best_score:
      best_score = total_score
      best_sentence = sentence
  
  print('Best Sentence: ', best_sentence)
  break

[0]: Book of Mormon passage:. - Topic: 0.3483990734453514, Related: 0.20138564705848694, Length: 0.8, Overall: 0.44992824016794614
[1]: Book of Mormon passage: The Book of Mormon. - Topic: 0.3606493699455468, Related: 0.14088051468133928, Length: 0.8, Overall: 0.4338432948756287


  "__main__", mod_spec)


[2]: Book of Mormon passage:"We are not to have a quarrel. - Topic: 0.29224357784006766, Related: 0.049948543310165405, Length: 0.6, Overall: 0.314064040383411
[3]: Book of Mormon passage: The Book of Mormon, 1857-1858, translated to the U. - Topic: 0.33200098051286014, Related: 0.13630549982190132, Length: 0.6, Overall: 0.3561021601115872
[4]: Book of Mormon passage:"In the midst of the wilderness there was an immense wilderness. - Topic: 0.31353999086897966, Related: 0.19969374388456346, Length: 0.6, Overall: 0.37107791158451436
[5]: Book of Mormon passage: 'The Book of Mormon is a work of a kind to the world, the most important in the history of man, and the most important in the history of the church. - Topic: 0.408707018489521, Related: 0.3285277783870697, Length: 0.6, Overall: 0.44574493229219686
[6]: Book of Mormon passage: A History of the Mormon Temple. - Topic: 0.3976970154825595, Related: 0.3080790042877197, Length: 0.6, Overall: 0.43525867325675965


  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)


[7]: Book of Mormon passage: †‬‬, †‬‬, †‬‬‬, †‬‬‬, †�. - Topic: 0.12346964783006194, Related: 0.003578118607401848, Length: 0.6, Overall: 0.24234925547915456
[8]: Book of Mormon passage: “If the Lord hath said it, ‘The Lord hath given it, and it shall be with him,’” “And then, if the. - Topic: 0.31381782873223024, Related: 0.2787749767303467, Length: 0.6, Overall: 0.3975309351541923
[9]: Book of Mormon passage: "He said, 'You are not the first to receive, but you have given me. - Topic: 0.3483990734453514, Related: 0.19106699526309967, Length: 0.6, Overall: 0.379822022902817
[10]: Book of Mormon passage:I was a child of Joseph's father (or brother or sister, as he was referred to by his own parents in the church) and he was an apostle from the. - Topic: 0.34057505184614506, Related: 0.28283846378326416, Length: 0.6, Overall: 0.4078045052098031
[11]: Book of Mormon passage:. - Topic: 0.3483990734453514, Related: 0.21094758808612823, Length: 0.8, Overall: 0.45311555384382657
[12]: Book o

  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)


[14]: Book of Mormon passage: “The Book of Mormon: ‪The Book of Mormon:‪The Book of Mormon:‪The Book of Mormon:‪The Book of Mormon:. - Topic: 0.25408772237207244, Related: 0.021383680403232574, Length: 0.6, Overall: 0.2918238009251017


  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)


[15]: Book of Mormon passage:1 The Book of Mormon contains the following passages:1 The Book of Mormon contains the following passages:1 The Book of Mormon contains the following passages:. - Topic: 0.2756014897440147, Related: 0.037659063935279846, Length: 0.6, Overall: 0.3044201845597649


  "__main__", mod_spec)


[16]: Book of Mormon passage: The Doctrine and Covenants of the Twelve ApostlesBy J. - Topic: 0.3071314588518034, Related: 0.15527848452329635, Length: 0.6, Overall: 0.3541366477916999


  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)


[17]: Book of Mormon passage:The Book of MormonThe Book of MormonThe Book of MormonThe Book of MormonThe. - Topic: 0.215736915071714, Related: 0.04792668670415878, Length: 0.6, Overall: 0.28788786725862425


  "__main__", mod_spec)


[18]: Book of Mormon passage:“I““I will not go down this road without a sign saying: ‪You must go down this way, and not to. - Topic: 0.2625000331636894, Related: 0.23001972436904908, Length: 0.6, Overall: 0.36417325251091287
[19]: Book of Mormon passage: the revelation of the prophet Joseph Smith, the revelation of the prophet Joseph Smith and the revelation of the prophet Joseph Smith, the revelation of the prophet Joseph Smith and the revelation of. - Topic: 0.35475011529816314, Related: 0.30209684669971465, Length: 0.6, Overall: 0.4189489873326259
[20]: Book of Mormon passage: The Doctrine of the Apostles (1 Peter 2:18–19) [The Church of Jesus Christ of Latter-day Saints]. - Topic: 0.41454889182177285, Related: 0.34924933314323425, Length: 0.6, Overall: 0.45459940832166906
[21]: Book of Mormon passage: A History of the Prophet and Prophets, and How to Get There. - Topic: 0.41017768779032143, Related: 0.32549795508384705, Length: 0.6, Overall: 0.44522521429138945
[22]: Book of Mormo

  "__main__", mod_spec)


[30]: Book of Mormon passage:<|endoftext|>The first of a series of posts on the Internet about the Mormon church's new leadership and the world-wide church, published this week, has been updated to include an. - Topic: 0.3118668982324007, Related: 0.2802289664745331, Length: 0.6, Overall: 0.3973652882356446
[31]: Book of Mormon passage: "I would say to the brethren, in that case, the Church would be a good and honorable society in which the children and their children are taught by the Lord, to. - Topic: 0.3726084885255525, Related: 0.24856115579605104, Length: 0.6, Overall: 0.4070565481072011


  "__main__", mod_spec)


[32]: Book of Mormon passage:(I am not the only prophet, but I am a Mormon. - Topic: 0.32901403698817466, Related: 0.2267653152346611, Length: 0.6, Overall: 0.3852597840742786


  "__main__", mod_spec)


[33]: Book of Mormon passage:<|endoftext|>"This is a great book of biblical wisdom, written by David. - Topic: 0.305303079407886, Related: 0.20947635918855667, Length: 0.6, Overall: 0.37159314619881423
[34]: Book of Mormon passage:. - Topic: 0.3483990734453514, Related: 0.18150503933429718, Length: 0.8, Overall: 0.4433013709265496
[35]: Book of Mormon passage: The Book of Mormon (1929–2000) (Book of Mormon (1929–2000) (Book of Mormon (1929–2000) (Book of Mormon (. - Topic: 0.37067233980934305, Related: 0.1120777279138565, Length: 0.6, Overall: 0.3609166892410665


  "__main__", mod_spec)


[36]: Book of Mormon passage:1. - Topic: 0.25268320979722664, Related: 0.0855347216129303, Length: 0.6, Overall: 0.31273931047005227
[37]: Book of Mormon passage:"I am a prophet; I am the messenger. - Topic: 0.3603557043725316, Related: 0.2436480075120926, Length: 0.6, Overall: 0.4013345706282081
[38]: Book of Mormon passage: The Book of Mormon, by George Washington. - Topic: 0.32504860530560586, Related: 0.16010295152664183, Length: 0.6, Overall: 0.3617171856107492
[39]: Book of Mormon passage: "The Church was not a Christian nation, but a Church of the Holy Land, and it was not the United States of God. - Topic: 0.4378325081995447, Related: 0.2714744806289673, Length: 0.6, Overall: 0.43643566294283725
[40]: Book of Mormon passage:I have read some of the passages, I am not an expert on the Mormon passage. - Topic: 0.3205728261472454, Related: 0.20466532930731773, Length: 0.6, Overall: 0.375079385151521


  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)
  "__main__", mod_spec)


[41]: Book of Mormon passage: ‏The prophet‌ ‏The apostle‌ ‏The prophet‌ ‏The prophet‌ ‏The prophet‌ ‏The prophet�. - Topic: 0.1075818112754834, Related: 0.0, Length: 0.6, Overall: 0.23586060375849446
[42]: Book of Mormon passage:The Prophet had the power to make a covenant with the prophet and his wife in the name of Jesus Christ. - Topic: 0.37996892602896964, Related: 0.30387747287750244, Length: 0.6, Overall: 0.42794879963549065
[43]: Book of Mormon passage: The Book of Mormon, 1845. - Topic: 0.3606493699455468, Related: 0.1261592388153076, Length: 0.6, Overall: 0.36226953625361813


  "__main__", mod_spec)


[44]: Book of Mormon passage:<|endoftext|>The New York Times, April 6 (Reuters) - A former CIA officer who pleaded guilty to espionage in 2013 pleaded guilty to five counts of espionage, the Washington Post reported. - Topic: 0.20245543673340302, Related: 0.14932522177696228, Length: 0.6, Overall: 0.3172602195034551
[45]: Book of Mormon passage:I am an apostle of this kingdom of God. - Topic: 0.41706169537158533, Related: 0.30125564336776733, Length: 0.6, Overall: 0.4394391129131175


  "__main__", mod_spec)


[46]: Book of Mormon passage:The following passages are from the passage from the Book of Mormon:(1) The Lord of the Rings, "The Lord of the Rings, ". - Topic: 0.25985893665929694, Related: 0.10746880471706391, Length: 0.6, Overall: 0.32244258045878693


  "__main__", mod_spec)


[47]: Book of Mormon passage: “The prophet is an angel,‡ and he is not a man. - Topic: 0.31462059495659095, Related: 0.2121742457151413, Length: 0.6, Overall: 0.3755982802239107
[48]: Book of Mormon passage:The Mormon Church, by the late 18th century and later the Church of Jesus Christ of Latter-Day Saints, has issued an official statement saying:. - Topic: 0.4167494414195575, Related: 0.2504858776926994, Length: 0.6, Overall: 0.422411773037419
[49]: Book of Mormon passage: The first chapter of Joseph Smith’s History of the Church in the New World. - Topic: 0.3439412711445508, Related: 0.25779300928115845, Length: 0.6, Overall: 0.4005780934752365
Best Sentence:  Book of Mormon passage: "We will make it clear in a future that our religion and the people of this country will be taught to believe it, and will be taught to be taught to be true.


In [0]:
LM = LanguageModel(k=25)

In [0]:
LM.get_sentence('', 30)

'The best of all things for us to do for the world!\n\nThis post comes at no cost to us, but if you are a subscriber or a.'

In [0]:
nlp = spacy.load('en_core_web_md')

In [0]:
evaluator = Evaluator()
topic = nlp('Sports')[0]
doc = nlp('Baseball is America\'s pastime')

topic_score = evaluator.related_topic_score(doc, topic)
related_score = evaluator.related_score(doc)

print('Topic Score: ', topic_score)
print('Sentence Relatedness Score: ', related_score)

Topic Score:  0.4435064
Sentence Relatedness Score:  0.28847122


In [0]:
topic.similarity(doc[5])

0.38921082

In [0]:
doc = nlp("Basketball is a game of movement, shooting, and defending.")
# print('{}, {} - Score: {:.4f}'.format(doc[1], doc[3], doc[1].similarity(doc[3])))
# print('{}, {} - Score: {:.4f}'.format(doc[1], doc[11], doc[1].similarity(doc[11])))
# print('{}, {} - Score: {:.4f}'.format(doc[9], doc[11], doc[9].similarity(doc[11])))
# print('{}, {} - Score: {:.4f}'.format(doc[0], doc[6], doc[0].similarity(doc[6])))
# print('{}, {} - Score: {:.4f}'.format(doc[0], doc[9], doc[0].similarity(doc[9])))

In [0]:
evaluator = Evaluator()
evaluator.related_topic_score(doc)

0.3780083

In [0]:
doc[3].vector_norm

5.1889863

## Word Distribution

In [0]:
# Download the simple word distribution from GitHub
!wget -O word_dist_full.csv https://raw.githubusercontent.com/ericburdett/cs673-personal-tutor/master/data/word_dist_full.csv

--2020-02-20 18:26:39--  https://raw.githubusercontent.com/ericburdett/cs673-personal-tutor/master/data/word_dist_full.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 163042 (159K) [text/plain]
Saving to: ‘word_dist_full.csv’


2020-02-20 18:26:39 (5.17 MB/s) - ‘word_dist_full.csv’ saved [163042/163042]



In [0]:
class WordDist(Dataset):
  def __init__(self):
    self.df = pd.read_csv('word_dist_full.csv', header=None, names=['word', 'freq'])
  
  def getdf(self):
    return self.df

  def dict_normalized(self): 
    copy = self.df.copy()
    copy['freq'] = copy['freq'] / copy['freq'].max()

    return copy.set_index('word').to_dict()['freq']

  def __getitem__(self, index):
    return self.df['word'][index], self.df['freq'][index]

  def __len__(self):
    return len(self.df)

# Old Code

## Imports

In [0]:
!pip install gpt-2-simple
!pip install gtts

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, utils, datasets
from tqdm import tqdm
from torch.nn.parameter import Parameter
import pdb
import torchvision
import os
import string
import gzip
import tarfile
from PIL import Image, ImageOps
import gc
import pdb
import pandas as pd
import gpt_2_simple as gpt2
import requests
import tensorflow as tf
import os
from gtts import gTTS 
from IPython.core.ultratb import AutoFormattedTB
from IPython.display import Audio, HTML
__ITB__ = AutoFormattedTB(mode = 'Verbose',color_scheme='LightBg', tb_offset = 1)

assert torch.cuda.is_available(), "Request a GPU from Runtime > Change Runtime"

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [0]:
# Download the children's book corpus from GitHub
!wget -O wiki_simple.txt https://raw.githubusercontent.com/ericburdett/cs673-personal-tutor/master/data/wiki_simple.txt

--2020-02-20 18:26:35--  https://raw.githubusercontent.com/ericburdett/cs673-personal-tutor/master/data/wiki_simple.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 47667422 (45M) [text/plain]
Saving to: ‘wiki_simple.txt’


2020-02-20 18:26:36 (199 MB/s) - ‘wiki_simple.txt’ saved [47667422/47667422]



## Classes

In [0]:
# Class that deals with training and generating text from GPT2
class LanguageModel():
  def __init__(self, model='124M', genre='children', train_steps=200, max_length=150):
    self.download_model(model)
    self.genre = genre
    self.max_length = max_length

    tf.reset_default_graph()
    self.sess = gpt2.start_tf_sess()

    if genre == 'children':
      gpt2.finetune(self.sess, 'wiki_simple.txt', model_name=model, steps=train_steps)
    else:
      raise('The specified genre does not exist')

  # Returns a list of sample texts with a given prefix and suffix
  def generate_text(self, prefix='<|startoftext|>', suffix='.', include_prefix=False, nsamples=5):
    if nsamples < 1 or nsamples > 20:
      raise('Error: nsamples must be within the range 1 <= x <= 20')

    return gpt2.generate(self.sess, prefix=prefix, truncate=suffix, include_prefix=include_prefix, batch_size=nsamples, nsamples=nsamples, return_as_list=True, length=self.max_length)
  
  def download_model(self, model_name):
    if not os.path.isdir(os.path.join("models", model_name)):
      print(f"Downloading {model_name} model...")
      gpt2.download_gpt2(model_name=model_name)
    else:
      print(f"{model_name} model is already downloaded")

In [0]:
# A class that contains some knowledge that the user has acquired over time.
# For example, it may hold the words that the user knows (and how well the user knows them)
class UserKnowledge():
  def __init__(self):
    pass

  # We will likely need some place to store the knowledge we acquire about the user
  # so that we can access it from session to session
  def save_knowledge(self, path):
    pass

In [0]:
# Class that evaluates sentences based on what the system knows about the user
class SentenceEvaluator():
  def __init__(self, level='beginner', user_knowledge=None):
    self.level = 'beginner'
    self.word_dist = WordDist().dict_normalized()
    self.word_dist_threshold = 0.033
    self.word_dist_threshold_step = 0.005
    self.word_dist_difficulty_threshold = 7

    if user_knowledge == None:
      self.user_knowledge = UserKnowledge()
    else:
      self.user_knowledge = user_knowledge

  # We will likely want some method to update the user_knowledge in the evaluator
  # Maybe, we will only pass user_knowledge into the evaluate function?...
  def update_user_knowledge(user_knowledge):
    pass

  # Score the sentences and return the sentence with the highest score
  def evaluate(self, sentences):
    scores = self.score(sentences)
    high_score_index = np.argmax(scores)

    return sentences[high_score_index]

  # Score each sentence based on some criteria
  def score(self, sentences):
    scores = []
    for sentence in sentences:
      score = 0
      score += self.length_score(sentence)
      score += self.word_difficulty(sentence)

      # add other criteria for scoring
      # ...
      # ...
      scores.append(score)
    
    return scores

  # For beginners, we want to favor shorter sentences
  # This method should change as we increase difficulty level
  def length_score(self, sentence):
    length = len(sentence)

    if self.level == 'beginner':
      if length > 0 and length <= 15:
        return 6
      elif length > 15 and length <= 25:
        return 10
      elif length > 25 and length <= 35:
        return 7
      elif length > 35 and length <= 45:
        return 3
      elif length > 45 and length <= 55:
        return 1
      else:
        return 0
    else:
      raise('support for non-beginners is not supported')

  # For beginners, easier the better!
  # This method should change as we increase difficulty level
  def word_difficulty(self, sentence):
    word_scores = []

    for word in sentence.split(' '):
      word = word.lower()
      word_score = self.word_dist.get(word, 0) # Return the word or 0 if it doesn't exist

      if word_score >= self.word_dist_threshold:
        word_scores.append(10)
      elif word_score >= self.word_dist_threshold - self.word_dist_threshold_step:
        word_scores.append(8)
      elif word_score >= self.word_dist_threshold - (2 * self.word_dist_threshold_step):
        word_scores.append(6)
      elif word_score >= self.word_dist_threshold - (3 * self.word_dist_threshold_step):
        word_scores.append(4)
      elif word_score >= self.word_dist_threshold - (4 * self.word_dist_threshold_step):
        word_scores.append(2)
      else:
        word_scores.append(0)

    score_med = np.median(word_scores)

    if score_med >= self.word_dist_difficulty_threshold:
      return 10
    elif score_med >= self.word_dist_difficulty_threshold - 1:
      return 8
    elif score_med >= self.word_dist_difficulty_threshold - 2:
      return 6
    elif score_med >= self.word_dist_difficulty_threshold - 3:
      return 4
    elif score_med >= self.word_dist_difficulty_threshold - 4:
      return 2
    else:
      return 0

In [0]:
class SentenceGenerator():
  def __init__(self, language_model=None, evaluator=None):
    if language_model == None:
      self.language_model = LanguageModel()
    else:
      self.language_model = language_model
    if evaluator == None:
      self.evaluator = SentenceEvaluator()
    else:
      self.evaluator = evaluator

  # Generate a sentence, pick the best one based on evaluation, return the sentence
  def generate(self, print_all_sentences=False):
    # Determine the prefix/suffix based on some kind of criteria that is learned over time
    prefix = self.determine_prefix()
    if prefix == '<|startoftext|>':
      include_prefix = False
    else:
      include_prefix = True
    suffix = self.determine_suffix()

    sentences = self.language_model.generate_text(prefix=prefix, suffix=suffix, include_prefix=include_prefix, nsamples=15)
    sentences = self.filter_punctuation(sentences)
    best_sentence = self.evaluator.evaluate(sentences)

    if print_all_sentences:
      for sentence in sentences:
        print(sentence)

    return best_sentence
  
  # Used to filter unwanted punctuation GPT2 might produce, like newlines
  def filter_punctuation(self, sentences):
    filtered_sentences = []

    for sentence in sentences:
      new_sentence = sentence.replace('\n', ' ')
      new_sentence = new_sentence.translate(str.maketrans('', '', string.punctuation))
      filtered_sentences.append(new_sentence)

    return filtered_sentences

  def determine_prefix(self):
    # Good simple sentence starters...
    # starters = ['<|startoftext|>']
    starters = ['I', 'You', 'The', 'They', 'It', '<|startoftext|>', 'He', 'She', 'My']
    random_index = np.random.randint(0, len(starters)) 

    return starters[random_index]

  def determine_suffix(self):
    return '.'

## Tutoring System

In [0]:
model = LanguageModel('117M', train_steps=200) # Will fine-tune model everytime this is called! -- Will need to be fixed at some point

In [0]:
generator = SentenceGenerator(language_model=model)
best_sentence = generator.generate(print_all_sentences=True)
print("Best Sentence: ", best_sentence)

My own experience with it was that it was a very funny and funny movie 
My game is an open world game 
My second favorite place to eat is at a nearby lake 
My name is Washington SmootHart  and I am an agent of the United Nations 
Myrious s talk with the King of France was not well received and the book was banned 
My wife and her brother were at home when a sudden  thunderbolt  struck the house 
My money was full of things that are not in the movie and were not intended for audience or to be seen by children 
My favorite game is High Roller Ballet 
My own life was spent in the city and in the West Bank  and belonged to the family of the people who lived there 
My hair is round and it looks like the shaft of a gun 
My way was to go to a village called Namur in Afghanistan in 1959 
My students were supposed to be students at the University of East Anglia  but they were supposed to be studying in the department of English 
My statutes were changed to go with the Dukes of France  and the D

In [0]:
def print_options():
  print('0: I don\'t know what this means.')
  print('1: Choose words I don\'t know.')
  print('2: Generate a better sentence.')
  print('3: I need definitions.')
  print('4: I understand! Give me another!')
  print('5: Exit: I\'ve learned enough for today.')

## Text-to-Speech

In [0]:
speech = gTTS(text = best_sentence, lang = 'en', slow = False)
speech.save('speech.mp3')
Audio(filename='speech.mp3', autoplay=True)

In [0]:
Audio('Hedidnotlikethesoundofit.mp3', autoplay=True)

something


## Learn-A-Language Loop
* Terrible Name...
* We need to come up with something!

In [0]:
print("Learn-A-Language - English")

while True:
  print('\nGenerating personalized sentence... Please Wait.')
  sentence = generator.generate()
  print("\nTry this sentence:")
  print(sentence, '\n')
  speech = gTTS(text=sentence, lang='en', slow=False)
  filename = sentence.replace(' ', '') + '.mp3'
  speech.save(filename)
  

  while True:
    print_options()
    Audio(filename=filename, autoplay=False)
    code = input('Enter a code from above:')
    if code in ['0','1','2','3','4','5']:
      code = int(code)
      break
    print('')
  
  if code == 0:
    print('\nI\'m Sorry! This is as get as it gets...')
  elif code == 1:
    print('\nI\'m Sorry! This functionality isn\'t currently available.')
  elif code == 2:
    print('\nNew sentence coming right up!')
  elif code == 3:
    print('\nI\'m Sorry! This functionality isn\'t currently available.')
  elif code == 4:
    print('\nGreat Job! Here\'s another.')
  else:
    print('\nThanks for using Learn-A-Language! Play again soon!')
    break

Learn-A-Language - English

Generating personalized sentence... Please Wait.

Try this sentence:
He did not like the sound of it  

0: I don't know what this means.
1: Choose words I don't know.
2: Generate a better sentence.
3: I need definitions.
4: I understand! Give me another!
5: Exit: I've learned enough for today.


KeyboardInterrupt: ignored

In [0]:
print("Learn-A-Language - English")

while True:
  print('\nGenerating personalized sentence... Please Wait.')
  sentence = generator.generate()
  print("\nTry this sentence:")
  print(sentence, '\n')
  speech = gTTS(text=sentence, lang='en', slow=False)
  filename = sentence.replace(' ', '') + '.mp3'
  speech.save(filename)
  Audio(filename=filename, autoplay=False)

  while True:
    print_options()
    Audio(filename=filename, autoplay=False)
    code = input('Enter a code from above:')
    if code in ['0','1','2','3','4','5']:
      code = int(code)
      break
    print('')
  
  if code == 0:
    print('\nI\'m Sorry! This is as good as it gets...')
  elif code == 1:
    print('\nI\'m Sorry! This functionality isn\'t currently available.')
  elif code == 2:
    print('\nNew sentence coming right up!')
  elif code == 3:
    print('\nI\'m Sorry! This functionality isn\'t currently available.')
  elif code == 4:
    print('\nGreat Job! Here\'s another.')
  else:
    print('\nThanks for using Learn-A-Language! Play again soon!')
    break

Learn-A-Language - English

Generating personalized sentence... Please Wait.

Try this sentence:
He did not like the sound of it  

0: I don't know what this means.
1: Choose words I don't know.
2: Generate a better sentence.
3: I need definitions.
4: I understand! Give me another!
5: Exit: I've learned enough for today.


KeyboardInterrupt: ignored