<a href="https://colab.research.google.com/github/Wittgenbot/fine-tuning/blob/main/finetuned_model_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set Up

## Package Installs

In [None]:
! pip install -q -U accelerate
! pip install -q -U bitsandbytes
! pip install -q -U huggingface-hub
! pip install -q -U transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.3/297.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m731.7/731.7 MB[0m [31m737.9 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

## Imports

In [None]:
import textwrap
import json

import huggingface_hub
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

## Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/LLM as a Judge/'

Mounted at /content/drive


# Model

## Configuration

In [None]:
model_name = 'descartesevildemon/Wittgenbot-7B'

bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_quant_type='nf4',
                                bnb_4bit_use_double_quant=True)

# Download Model

In [None]:
save_directory = model_name.replace('/','_')

huggingface_hub.snapshot_download(repo_id=model_name,
                                  local_dir_use_symlinks=False,
                                  local_dir=save_directory)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/652 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

'/content/descartesevildemon_Wittgenbot-7B-hopefully-good'

## Load Model

In [None]:
model = AutoModelForCausalLM.from_pretrained(save_directory,
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.bfloat16,
                                             device_map='auto',
                                             local_files_only=True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Load Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(save_directory)

# Model Manual Tests

## Response Generation Function

In [None]:
def query_model(question):

  prompt_template = """
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about Ludwig Wittgenstein in a conversational, clear and coherent tone:
  {question}
  [/INST]
  """
  prompt = prompt_template.format(question=question)

  encodeds = tokenizer(prompt,
                       return_tensors="pt",
                       add_special_tokens=True)

  model_inputs = encodeds.to('cuda:0')

  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens=1000,
                                 do_sample=True,
                                 top_p=0.9,
                                 temperature=0.7,
                                 pad_token_id=tokenizer.eos_token_id)

  decoded = tokenizer.batch_decode(generated_ids)

  response = decoded[0]

  formatted_response = ''
  lines = response.split('\n')
  for line in lines:
      if len(line) > 125:
          w = textwrap.TextWrapper(width=125, break_long_words=False)
          line = '\n'.join(w.wrap(line))
      formatted_response += line + '\n'

  print(formatted_response)

## Manual Tests

In [None]:
question ='According to Ludwig Wittgenstein\'s philosophy, is the existence of a private language possible?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  According to Ludwig Wittgenstein's philosophy, is the existence of a private language possible?
  [/INST]
  
  No, according to Ludwig Wittgenstein's philosophy, the existence of a private language is not possible. This is a central
idea in his later philosophy, particularly in his work "Philosophical Investigations." Wittgenstein argues that language is
inherently social and public, and that the meaning of words comes from their use in a linguistic community. Therefore, a
private language, which would be a language used by only one person, would not have any meaning because there would be no
shared rules or practices for using words. Additionally, the idea of a private language raises problems with verification, as
it would be impossible for anyone else to confirm or understand the mea

In [None]:
question ='What determines the meaning of a word?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What determines the meaning of a word?
  [/INST]
  
  Wittgenstein, in his later philosophy, argues that the meaning of a word is determined by its use in the language. This
idea is central to his philosophy of language, which is presented in his work "Philosophical Investigations". According to
Wittgenstein, words don't have inherent meanings; instead, their meanings are derived from the ways they are used in specific
language-games. These language-games are the various activities in which language is used, such as giving orders, describing
objects, or telling stories.

So, to answer your question, the meaning of a word is not determined by some inherent property or by pointing to an object,
but rather by how it is used in the context of a language-game. This view is a departure from t

In [None]:
question = 'How did Wittgenstein use the example of "games" to illustrate the family resemblance concept?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  How did Wittgenstein use the example of "games" to illustrate the family resemblance concept?
  [/INST]
  
  Wittgenstein's concept of "family resemblances" is a key idea in his later philosophy, particularly in the "Philosophical
Investigations." He used the example of "games" to illustrate this concept.

Wittgenstein pointed out that if you look at various games, you'll find that they don't have one common feature that all
games share. Instead, there are similarities and overlaps between different types of games, much like members of a family
share similarities but don't have one defining characteristic that all members possess.

For instance, consider board games, card games, sports, and video games. They are all called games, but they don't have one
essential feature that all games 

In [None]:
question ='How do philosophical problems arise from misunderstandings of language?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  How do philosophical problems arise from misunderstandings of language?
  [/INST]
  
  Wittgenstein, in his later philosophy, emphasized that many philosophical problems arise due to misunderstandings of
language. He believed that language is a tool we use to describe our experiences and the world around us. However, when we
misuse or misunderstand this tool, it can lead to confusion and the creation of philosophical problems.

For instance, we may use a word in a way that is not consistent with its ordinary usage, or we may apply a concept in a
situation where it does not belong. This can lead to ambiguity, vagueness, or outright confusion, which in turn can give rise
to philosophical problems.

Wittgenstein's solution to these problems is to clarify the meaning of the words and concep

In [None]:
question ='Can you provide examples of how philosophical problems arise from the misuse of language?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Can you provide examples of how philosophical problems arise from the misuse of language?
  [/INST]
  
  Absolutely, I'd be happy to provide some examples from Ludwig Wittgenstein's philosophy. Wittgenstein, in his later work,
particularly in the "Philosophical Investigations," argued that many philosophical problems arise from the misuse of
language. Here are a few examples:

1. **Private Language:** Wittgenstein argued that the idea of a private language, a language that only one person understands
and uses, is incoherent. This is because language is inherently social and dependent on shared agreement. If a word's meaning
is known only to the person who uses it, it can't function as part of a language.

2. **Meaning of Words:** Wittgenstein challenged the idea that words have fixed, i

In [None]:
question ='In what ways does the examination of pain\'s sameness challenge the understanding of subjective experiences?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  In what ways does the examination of pain's sameness challenge the understanding of subjective experiences?
  [/INST]
  
  That's a great question. Wittgenstein's discussion of pain's sameness in his "Philosophical Investigations" indeed
challenges our understanding of subjective experiences.

The traditional view, which Wittgenstein calls the "private language argument," assumes that each person has unique, private
access to their own sensations, such as pain. This view suggests that the concept of pain is based on an individual's
personal, subjective experience.

However, Wittgenstein argues against this notion by examining the concept of pain's sameness. He posits that if pain were
truly private and subjective, it would be impossible to determine whether two people's experiences of p

In [None]:
question ='What is the purpose of the analogy with the toolbox?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is the purpose of the analogy with the toolbox?
  [/INST]
  
  The analogy with the toolbox is used by Ludwig Wittgenstein in his philosophical work, particularly in the "Philosophical
Investigations," to illustrate his concept of language games and meanings. Wittgenstein suggests that the words in our
language are like tools in a toolbox, each having a specific use and function in a particular context or "language game."

The purpose of this analogy is to emphasize that the meaning of a word or phrase is not inherent or universal but rather
depends on the rules and conventions of the specific language game in which it is used. Wittgenstein argues that
understanding the meaning of a word or phrase involves understanding the role it plays in the language game, just as
understanding 

In [None]:
question ='What does following a rule entail?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does following a rule entail?
  [/INST]
  
  Following a rule, according to Ludwig Wittgenstein, is a fundamental concept in language and meaning. Wittgenstein, in his
later philosophy, argued that meaning is not something that exists independently of language use, but rather it is derived
from the way we use language in specific contexts.

When Wittgenstein talks about following a rule, he is referring to the idea that our use of language is guided by certain
established patterns or rules. These rules are not fixed or absolute, but rather they are determined by the practices and
agreements of a language community.

So, when we follow a rule in language use, we are essentially acting in accordance with these established patterns or
agreements. This might involve using words in a ce

In [None]:
question ='Can you describe Wittgenstein\'s picture theory of meaning?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Can you describe Wittgenstein's picture theory of meaning?
  [/INST]
  
  Absolutely, I'd be happy to explain Wittgenstein's picture theory of meaning. This theory is presented in his early work,
the Tractatus Logico-Philosophicus.

The picture theory suggests that language can represent the world through propositions, which are like pictures of reality.
Just as a picture is a factual arrangement of elements, a proposition is a factual arrangement of words.

Wittgenstein believed that the structure of a proposition mirrors the structure of the state of affairs it represents. This
is possible because language and the world share a logical form. So, when we look at a proposition, we can see how its
elements correspond to the elements of the state of affairs it describes.

For example, the

In [None]:
question ='What does Wittgenstein say about tautologies and contradictions in the Tractatus?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does Wittgenstein say about tautologies and contradictions in the Tractatus?
  [/INST]
  
  Wittgenstein, in his early work Tractatus Logico-Philosophicus, discusses tautologies and contradictions in the context of
logical propositions. Tautologies, for Wittgenstein, are propositions that are true in all possible worlds, while
contradictions are propositions that are false in all possible worlds.

In the Tractatus, Wittgenstein argues that tautologies and contradictions do not convey any meaningful information about the
world. This is because their truth or falsity is not dependent on the state of affairs in the world, but rather on the
logical structure of the propositions themselves.

For instance, consider the tautology "Either it is raining or it is not raining." This statement

In [None]:
question ='Why is the notion of a criterion for correctness important for Wittgenstein\'s private language argument?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Why is the notion of a criterion for correctness important for Wittgenstein's private language argument?
  [/INST]
  
  The notion of a criterion for correctness is important for Wittgenstein's private language argument because it's central to
his refutation of the idea that individuals can create and understand a private language, only understood by themselves.

Wittgenstein argues that for a language to be meaningful, it must have publicly accessible criteria for correct usage. In
other words, there must be a way for others to verify or judge whether a person is using the language correctly. This is
because language is a social construct, and its meaning is derived from its usage within a community.

In the context of the private language argument, Wittgenstein challenges the idea tha

In [None]:
question ='What does Wittgenstein say about aspect perception?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does Wittgenstein say about aspect perception?
  [/INST]
  
  Wittgenstein's philosophy, particularly as expressed in his later work, such as the "Philosophical Investigations," delves
into the concept of aspect perception. Aspect perception is the ability to see something as something else, or to experience
a shift in the way we understand or interpret a particular image or situation.

Wittgenstein uses the example of seeing a drawing as either a duck or a rabbit, depending on how we choose to interpret it.
This shift in perception is not a matter of seeing new features, but rather a change in the way we organize and interpret the
same features.

Wittgenstein argues that aspect perception is a fundamental aspect of human understanding and meaning. He suggests that our
ability to p

In [None]:
question ='What is the paradox of ostension?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is the paradox of ostension?
  [/INST]
  
  The paradox of ostension is a concept that comes from Ludwig Wittgenstein's later philosophy, particularly his work in the
Philosophical Investigations. It's a paradox that arises when we try to explain the meaning of a word or concept through the
direct pointing to an example, which is what ostension means.

The paradox goes like this: if I point to a particular example and say "this is red," I'm using the word "red" to explain
what the word "red" means. But this creates a problem because I'm using the word "red" to explain the meaning of the word
"red." It's like I'm using the word to explain itself, which seems circular and nonsensical.

Wittgenstein uses this paradox to challenge the idea that the meaning of a word can be explained by

In [None]:
question ='What is Wittgenstein trying to show with the builders\' language example?'
query_model(question)

<s> 
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about
Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is Wittgenstein trying to show with the builders' language example?
  [/INST]
  
  Wittgenstein's builders' language example is from his later philosophy, particularly from the Philosophical Investigations.
In this example, he's trying to show that language is a tool used in various ways within a form of life. The builders'
language consists of only two workers, a builder and an assistant, and a few basic words like "slab," "block," "pillar,"
"beam," and "column," along with some actions like "bring," "stack," "take," and "look."

Wittgenstein uses this example to argue against the idea that words have fixed, inherent meanings. Instead, he suggests that
meaning arises from the use of words in specific contexts and the rules governing that use. In the builders' language, words
have 

In [None]:
question = 'What did Wittgenstein show using the "five red apples" example?'
query_model(question)

Wittgenstein used the example of "five red apples" in his philosophical work, particularly in his later period, to illustrate
his concept of language games and private language.

The "five red apples" example is part of Wittgenstein's argument against the possibility of a private language, which he
believed was inherently flawed. According to Wittgenstein, the meaning of a word is determined by its use in language, and
this use is a public, social activity. Therefore, if we cannot publicly demonstrate the meaning of a word, it cannot have a
meaning.

In the "five red apples" example, Wittgenstein asked us to imagine a person who associates the word "apple" with a particular
sensation, say the sensation of seeing five red apples. The person then decides to use the word "apple" to refer to this
sensation in the future. However, Wittgenstein argued that this person cannot truly communicate the meaning of the word
"apple" to anyone else, because the sensation is private and unique to the i

# Generate Wittgenbot Answers for LLM as a Judge

## Query Model Function -- Sliced Output

In [None]:
def query_model(question):

  prompt_template = """
  [INST]
  You are a philosophy professor specialized in Ludwig Wittgenstein. Your task is to answer the following question about Ludwig Wittgenstein in a conversational, clear and coherent tone:
  {question}
  [/INST]
  """
  prompt = prompt_template.format(question=question)

  encodeds = tokenizer(prompt,
                       return_tensors="pt",
                       add_special_tokens=True)

  model_inputs = encodeds.to('cuda:0')

  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens=1000,
                                 do_sample=True,
                                 top_p=0.9,
                                 temperature=0.7,
                                 pad_token_id=tokenizer.eos_token_id)

  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

  response = decoded[0][(len(prompt)):].strip()

  return response

## Test Questions

In [None]:
TLP_test_questions = [
    'How does Wittgenstein define the concept of a "proposition" in the Tractatus?',
    'What role does logic play in the structure of reality as presented in the Tractatus?',
    'Can you explain the picture theory of language proposed in the Tractatus?',
    'How does Wittgenstein distinguish between what can be said and what can only be shown?',
    'How are facts and states of affairs conceptualized in the Tractatus?',
    'What is the significance of the limit of language in the Tractatus?',
    'How does the Tractatus address the relationship between language and reality?',
    'What is the purpose of the ladder metaphor in the conclusion of the Tractatus?',
    'What is meant by "propositional logic" in the Tractatus, and how is it significant to the work’s overall argument?',
    'How does the Tractatus critique the possibility of metaphysical propositions?',
    'What is the role of silence in Wittgenstein’s philosophy as expressed in the Tractatus?',
    'What did Wittgenstein mean by "philosophy is not a body of doctrine but an activity" in the Tractatus?',
    'How does Wittgenstein treat the problem of solipsism in the Tractatus?',
    'What does Wittgenstein mean by "The limits of my language mean the limits of my world"?',
    'How does the Tractatus conceptualize the idea of sense and nonsense in language?',
]

PI_test_questions = [
    'In Wittgenstein\'s Philosophical Investigations, what determines the meaning of a word?',
    'How did Wittgenstein use the example of "games" in Philosophical Investigations to illustrate the family resemblance concept?',
    'In Philosophical Investigations, what is the concept of family resemblance?',
    'In the context of Philosophical Investigations, is the existence of a private language possible?',
    'How can we confirm that someone is following a rule, according to Philosophical Investigations?',
    'In Philosophical Investigations, how can the concept of sameness be used to teach a rule?',
    'What is the role of language-games in Wittgenstein\'s Philosophical Investigations?',
    'How is the idea that mental processes form the basis of our understanding of language critiqued in Philosophical Investigations?',
    'Do private mental objects exist according to Wittgenstein’s Philosophical Investigations?',
    'What is the relationship between forms of life and language in Philosophical Investigations?',
    'In Philosophical Investigations, how is it explained that the meaning of a word is its use in language?',
    'How do philosophical problems arise from misunderstandings of language, as discussed in Philosophical Investigations?',
    'What is problematic about the Augustinian view on meaning in Philosophical Investigations?',
    'In Philosophical Investigations, what is said about the misguided nature of philosophical questions?',
    'What is the purpose of the analogy with the toolbox in Philosophical Investigations?',
    'In what ways does the notion of "language-games" in Philosophical Investigations challenge traditional epistemology?',
    'What implications does the critique of private language in Philosophical Investigations have for theories of consciousness?',
    'How is the concept of pain used in Philosophical Investigations to argue against the possibility of a private language?',
    'In what ways does the examination of pain\'s sameness in Philosophical Investigations challenge the understanding of subjective experiences?',
    'What are the limitations of language, as discussed in Wittgenstein’s Philosophical Investigations?',
    'In Philosophical Investigations, what is the relationship between forms of life and understanding philosophy?'
]

test_questions = {
    'TLP': TLP_test_questions,
    'PI': PI_test_questions
}

## Generate Answers for Test Questions & Save to Drive

In [None]:
wittgenbot_answers = {}

for work, questions in test_questions.items():

  wittgenbot_answers[work] = {}

  for question in questions:

    answer = query_model(question)

    wittgenbot_answers[work][question] = answer

wittgenbot_answers_json = json.dumps(wittgenbot_answers, indent=4)

In [None]:
wittgenbot_answers_file_name = 'wittgenbot_answers.txt'
wittgenbot_answers_file_path = data_dir + wittgenbot_answers_file_name

wittgenbot_answers_json = json.dumps(wittgenbot_answers, indent=4)

with open(wittgenbot_answers_file_path, 'w') as file:
    file.write(wittgenbot_answers_json)

print(f'File saved as {wittgenbot_answers_file_name}')

File saved as wittgenbot_answers.txt
