A notebook to search through all the words of a text and find the least common words by count. Then calls GPT-3 to get the meaning of the least common words.
1) load API key from config.ini
2) load text from file
3) strip out punctuation and split into words
4) remove stop words like (A, the, and, etc.)
5) count the words
6) find the least common words by looking at the last 30 words
7) call GPT-3 to get the meaning of the least common words


In [1]:
import re 
import nltk
from nltk.corpus import stopwords
import numpy as np
import pandas as pd
import openai
import configparser

In [2]:
# Set up your GPT-3 API key
config = configparser.ConfigParser()
config.read("../config.ini")
openai.api_key = config.get("openai", "api_key")

In [3]:
# Update stopowrds  list 
nltk.download('stopwords')
stop_words = stopwords.words("english")

[nltk_data] Downloading package stopwords to /Users/blev/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
#Read in the text file of the book as f and create a string of the text
with open ("../ref/Taleof2Cities.txt", "r", encoding='utf-8') as f:
    text = f.read()

In [5]:
#Strip out punctuanation and make lowercase and split the text into words
text = re.sub(r'[^\w\s]', '', text).lower()
words = text.split()

In [6]:
#Use NTLK to remove stop words
words = [word for word in words if word not in stop_words]

In [7]:
df_word_count = pd.Series(words).value_counts()

In [8]:
#Create a list of the 30 least common words
least_common = df_word_count.tail(30).index.tolist()

In [9]:
least_common

['knowthat',
 'manettehow',
 'parallels',
 'nurtured',
 'ungenerous',
 'adjure',
 'manfully',
 'heartor',
 'deem',
 'heartdo',
 'upstairsand',
 'alonefor',
 'darker',
 'mewell',
 'headthey',
 'lovedthe',
 'werecharles',
 'weaken',
 'perpetuate',
 'truthfully',
 'purely',
 'undoubted',
 'suitor',
 'division',
 'outweigh',
 'suitors',
 'retain',
 'muchi',
 'presumption',
 'newsletter']

In [18]:
def generate_gpt3_dialogue(prompt):
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are 6th grade literature teacher and will provide context and meaning for each word."},
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message.content.strip()

In [32]:
df = pd.DataFrame(columns=['Word', 'Definition'])
for word in least_common:
    term = word
    definition = generate_gpt3_dialogue(word)
    df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)

  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignore_index=True)
  df = df.append({'Word': term, 'Definition': definition}, ignor

In [33]:
df

Unnamed: 0,Word,Definition
0,knowthat,"I do not recognize the word ""knowthat."" Can yo..."
1,manettehow,"I apologize, but ""manettehow"" doesn't seem to ..."
2,parallels,"The term ""parallels"" refers to two or more thi..."
3,nurtured,"The word ""nurtured"" means to care for, protect..."
4,ungenerous,"The word ""ungenerous"" can be used to describe ..."
5,adjure,"""Adjure"" is a verb that means to solemnly urge..."
6,manfully,Manfully is an adverb that means to do somethi...
7,heartor,"I'm sorry, but ""heartor"" is not a word in the ..."
8,deem,"The word ""deem"" means to consider or judge in ..."
9,heartdo,"I'm sorry, but ""heartdo"" is not a recognized w..."
