In [169]:
# from transformers import AutoModelForCausalLM, AutoTokenizer, top_k_top_p_filtering
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
from torch.nn import functional as F
import random
import numpy as np
from numpy import random as np_random

In [170]:
torch.manual_seed(0)
random.seed(0)
np_random.seed(0)

In [38]:
!conda activate dl


CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
To initialize your shell, run

    $ conda init <SHELL_NAME>

Currently supported shells are:
  - bash
  - fish
  - tcsh
  - xonsh
  - zsh
  - powershell

See 'conda init --help' for more information and options.

IMPORTANT: You may need to close and restart your shell after running 'conda init'.




In [28]:
# Load GPT-2 tokenizer and fine-tuned model
model_checkpoint = "distilgpt2"
# tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
# model = AutoModelForCausalLM.from_pretrained(".", pad_token_id=tokenizer.eos_token_id)


In [34]:
!ls

config.json	    func_for_tom.py  pytorch_model.bin
func_for_tom.ipynb  philosophers     WestWorld.ipynb


In [171]:
model_checkpoint = "distilgpt2"
tokenizer_republic = GPT2Tokenizer.from_pretrained("./philosophers/republic", use_fast=True)
tokenizer_zarathustra = GPT2Tokenizer.from_pretrained("./philosophers/zarathustra", use_fast=True)
tokenizer_tao = GPT2Tokenizer.from_pretrained("./philosophers/tao", use_fast=True)
model_republic = AutoModelForCausalLM.from_pretrained("./philosophers/republic", pad_token_id=tokenizer.eos_token_id)
model_zarathustra = AutoModelForCausalLM.from_pretrained("./philosophers/zarathustra", pad_token_id=tokenizer.eos_token_id)
model_tao = AutoModelForCausalLM.from_pretrained("./philosophers/tao", pad_token_id=tokenizer.eos_token_id)

In [172]:
models = {
    "republic": model_republic,
    "zarathustra": model_zarathustra,
    "tao": model_tao
}

In [183]:
class WiseWorld:
    """
    * Environment where the hosts are. 
    * Controls the flow of hosts thinking vs. speaking vs. listening. 
    """
    def __init__(self,):
        self.active_hosts = []
    
    def add_host(self, new_host, host_name):
        # TODO: When new_host is dict, require name too
        self.active_hosts.append(new_host)
        pass
    
# COMMENTING OUT UNTIL USE     
#     def retire_host():
#         # TODO: When active_hosts is dict, remove
#         pass
    
    def run_society(self,num_loops=10):
        output = []
        for _ in range(num_loops):
            output.append(self.host_thinking_cycle(2))
        return output
        
    def host_thinking_cycle(self, num_thinks):
        
        output = []
        
        for host in self.active_hosts:
            output.append([host.get_name() + ": "])
        
        # Thinking
        for i, host in enumerate(self.active_hosts):
            for _ in range(num_thinks):
                output[i].append(host.think())
            
        # Speaking/Listening
        random_host_id = random.randint(0,len(self.active_hosts)-1)
        speaker = self.active_hosts[random_host_id]
        speaker_name = speaker.get_name()
        print(speaker_name + " speaks:")
        speech = speaker.speak()
        
        for i, host in enumerate(self.active_hosts):
            host.listen(speech)
            output[i].append(speech)
        
        return output

# COMMENTING OUT UNTIL USE            
#     def reprogram(new_program):
#         # Assumes you want to reprogram all hosts
#         # TODO: Make this so we can selectively reprogram one
#         for host in self.active_hosts:
#             host.mind_control(new_program)

In [188]:
"""
SETUP
"""

plato_seed = "And now, at last, we have reached firm ground, and are able to infer that the virtues of the State and of the individual are the same."
nietzsche_seed = "When Zarathustra was thirty years old, he left his home and the lake of his home, and went into the mountains."
lao_tzu_seed = "The Tao that can be trodden is not the enduring and unchanging Tao."

universal_prompt = "What is the meaning of life?"

wiseworld = WiseWorld()
plato = Host(model_plato, "PLATO", universal_prompt)
nietzsche = Host(model_zarathustra, "NIETZSCHE", universal_prompt)
lao_tzu = Host(model_tao, "LAO TZU", universal_prompt)

wiseworld.add_host(plato, "PLATO")
wiseworld.add_host(lao_tzu, "LAO TZU")
wiseworld.add_host(nietzsche, "NIETZSCHE")

In [192]:
output = wiseworld.run_society(10)
output

LAO TZU speaks:
LAO TZU speaks:


[[['PLATO: ',
   '--Or is it just that living itself spoilt by living experiences?',
   ' Wellthen, my brethren, if one should live indanger, seek shelterfrom itself in a forest! And if one would not love himself, seek a refuge in a forest?',
   '--and should one love oneself only as a means to living?'],
  ['LAO TZU: ',
   ' To learn from oneself and leave oneself in order to be able to livefreely, is the virtue of the slave-master who never enlighteth himself.',
   'Or is it misery without purpose?',
   '--and should one love oneself only as a means to living?'],
  ['NIETZSCHE: ',
   ' Or is itdesirability to live among apes and wolves?',
   ' Must one not live for himself?',
   '--and should one love oneself only as a means to living?']],
 [['PLATO: ',
   'Or is it bad to live among animals which one should not love?',
   '',
   ' And where there are novens for living,how should one live without committing a sin?'],
  ['LAO TZU: ',
   ' If so--life should be a means to all who are b

In [189]:
plato.get_monologue()

'What is the meaning of life?'

In [190]:
nietzsche.get_monologue()

'What is the meaning of life?'

In [191]:
lao_tzu.get_monologue()

'What is the meaning of life?'

In [180]:
class Host:
    """
    * AI agent we are interested in.
    * At each time step, either speaks, listens or thinks. 
    """
    def __init__(self, model, name="Philosopher", seed_sentence="Hello, world. ", temperature=1.0):
        self.name = name # Type: str
        self.internal_monologue = seed_sentence # Type: str
        self.memory_size = 0 # Type: int
        self.seed_sentence = seed_sentence # Type: str
        self.temperature = temperature
        self.model = model
        
    def update_monologue(self,new_thought):
        """
        Even though internal_monologue is list of sentences, we 
            want to limit its number of words in monologue, to account for long sentences.
            GPT2 model has an approx max number of words (what is it?), so when exceed
            need to pop off sentences until the total number of words is less. 
        """
        max_words = 900 # Model can take 1024 subwords, so this is conservative
        new_thought_num_words = len(new_thought.split())
        self.memory_size += new_thought_num_words
        self.internal_monologue += new_thought
        while self.memory_size > max_words:
            delete_chunk_size = 15
            diff = memory_size - max_words
            self.internal_monologue = self.internal_monologue[delete_chunk_size:]
            popped_sentence_num_words = self.internal_monologue.pop(0)
            self.memory_size -= popped_sentence_num_words
            if len(self.internal_monologue) < delete_chunk_size : break
        return self.internal_monologue
        
    def get_monologue(self):
        return self.internal_monologue
    
    def get_name(self):
        return self.name
        
    def listen(self,new_thought):
        self.update_monologue(new_thought)
        pass
    
    def speak(self):
        speech = self.think(False) # Don't update now; will update all together
        return speech
    
    def think(self, update=True):
        thought = self.generate_sentence(has_input = False)
        if update: self.update_monologue(thought)
        return thought
    
# COMMENTING OUT UNTIL USE     
#     def mind_control(self, new_thought):
#         # For maintenance to control them. 
#         # update_monologue is for other functions to call. 
#         # TODO indicate that update_monologue is private, python style. 
#         self.update_monologue(new_thought)
    
    def generate_sentence(self,input_text="", has_input=True, memory=True):
        """
        Arg: input_text: (str) The text to add to internal monologue
        Return: output_sentence: (str) The response sentence
        """
        if has_input: 
            if memory:
                new_internal_monologue = self.update_monologue(input_text)
            else: new_internal_monologue = input_text # Don't record to memory
        else: 
            new_internal_monologue = self.internal_monologue
        
        input_tokens = tokenizer.encode(new_internal_monologue, return_tensors='pt')
        input = tokenizer.encode(string_input_sentence, return_tensors='pt')
        
        # set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
        sample_outputs = model_zarathustra.generate(
            input_ids=input,
            do_sample=True, 
            max_length=100, 
        #     top_k=50, 
        #     top_p=0.95,
            num_return_sequences=1,
        #     temperature = 1.0,
        )

        output_initial = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
        len_input = len(string_input_sentence)
        output_trim = output_initial[len_input:]
        end_period = output_trim.find(".")
        end_question = output_trim.find("?")
        if (end_period > 0) and (end_question > 0): 
            end = min(end_period, end_question)
        elif end_period < 0: end = end_question
        elif end_question < 0: end = end_period
        else: end = -1
        output = output_trim[:end+1]
        return output
# COMMENTING OUT UNTIL USE     
#     def change_temperature(new_temp):
#         self.temperature = new_temp

# The Forge

In [161]:
string_input_sentence = "How should one best live life?"
input = tokenizer.encode(string_input_sentence, return_tensors='pt')

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model_zarathustra.generate(
    input_ids=input,
    do_sample=True, 
    max_length=100, 
#     top_k=50, 
#     top_p=0.95,
    num_return_sequences=5,
#     temperature = 1.0,
    repetition_penalty = 1.0
)

output_initial = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
len_input = len(string_input_sentence)
print(len_input)
output_trim = output_initial[len_input:]
end_period = output_trim.find(".")
end_question = output_trim.find("?")
if (end_period > 0) and (end_question > 0): 
    end = min(end_period, end_question)
elif end_period < 0: end = end_question
elif end_question < 0: end = end_period
else: end = -1
output = output_trim[:end+1]
print(end)
print(output_trim[:end+1])

30
84
 Wherewith all innocence striveth wanteth to live, and all weariness wanteth to live?


In [131]:
sample_outputs[0]

tensor([ 2437,   815,   530,  1266,  2107,  1204,    30,   220,   554,   262,
         2456,   286,  7361,  3700, 14909,    25,   198,     1, 10418,   389,
          588, 19435,    11,   290,   262, 24276,   481, 19861,   511, 24276,
          526,    18,    13,  4162,   815,   530,   523,   881,  3252,  1918,
          307,  7207,   268,   262,   661,    30,   220,  4362,   286,   262])

In [149]:
output

'How should one best live life? Is notlife so repast and chastiseless and almost unrighteous, that which is profited by indulging andmanaging? Is this the condition of which we should choose--and when a man expireshis abode is not with him but with his owneyes, and with the pattern and manner in which he ought tolive, and is perfected by his manifold works?Certainly, he said, and he who does not partake of this lifehas only a better life, but a worse; if he live many years, if he live a great deal longer, will he not only live happy here, but a barren land, which is full of miseryand pain?From what have you said? he'

In [None]:
def function_for_tom(string_input_sentence):
    
    input = tokenizer.encode(string_input_sentence, return_tensors='pt')

    # set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
    sample_outputs = model.generate(
        input_ids=input,
        do_sample=True, 
        max_length=50, 
        top_k=50, 
        top_p=0.95,
        num_return_sequences=1,
        temperature = 0.7
    )

    output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    end = output.find(".")
    len_input = len(string_input_sentence)
    output_trim = output[len_input+1:end+1]
    if end == -1: output_trim = output[len_input+1:]
    return output_trim.replace('\n', '')