<a href="https://colab.research.google.com/github/Wittgenbot/fine-tuning/blob/main/finetuned_model_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set Up

## Package Installs

In [None]:
! pip install -q -U accelerate
! pip install -q -U bitsandbytes
! pip install -q -U huggingface-hub
! pip install -q -U transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.9/388.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25h

## Imports

In [None]:
import textwrap
import json

import huggingface_hub
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

## Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/LLM as a Judge/'

Mounted at /content/drive


# Model

## Configuration

In [None]:
model_name = 'descartesevildemon/Wittgenbot-7B'

bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_quant_type='nf4',
                                bnb_4bit_use_double_quant=True)

# Download Model

In [None]:
save_directory = model_name.replace('/','_')

huggingface_hub.snapshot_download(repo_id=model_name,
                                  local_dir_use_symlinks=False,
                                  local_dir=save_directory)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/652 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

'/content/descartesevildemon_Wittgenbot-7B-no-clue'

## Load Model

In [None]:
model = AutoModelForCausalLM.from_pretrained(save_directory,
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.bfloat16,
                                             device_map='auto',
                                             local_files_only=True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Load Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(save_directory)

# Model Manual Tests

## Response Generation Function

In [None]:
def query_model(question):

  prompt_template = """
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein. \
  Your task is to answer the following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  {question}
  [/INST]
  """

  prompt = prompt_template.format(question=question)

  encodeds = tokenizer(prompt,
                       return_tensors="pt",
                       add_special_tokens=True)

  model_inputs = encodeds.to('cuda:0')

  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens=1000,
                                 do_sample=True,
                                 top_p=0.9,
                                 temperature=0.7,
                                 pad_token_id=tokenizer.unk_token_id)


  decoded = tokenizer.batch_decode(generated_ids)

  response = decoded[0]

  formatted_response = ''
  lines = response.split('\n')
  for line in lines:
      if len(line) > 125:
          w = textwrap.TextWrapper(width=125, break_long_words=False)
          line = '\n'.join(w.wrap(line))
      formatted_response += line + '\n'

  print(formatted_response)

## Manual Tests

In [None]:
question ='According to Ludwig Wittgenstein\'s philosophy, is the existence of a private language possible?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  According to Ludwig Wittgenstein's philosophy, is the existence of a private language possible?
  [/INST]
  
  Wittgenstein's philosophy, as expressed in his later work, particularly in the "Philosophical Investigations," argues
against the possibility of a private language. A private language, in this context, is a language that only one person uses
and understands, and whose meanings are based on the individual's private experiences.

Wittgenstein posits that language is inherently social and public. He believes that the meaning of a word is determined by
its use in a language, which is a social activity. Without the possibility of sharing or comparing experiences, there can be
no agreement on the meaning of words, and thus, no language.

In 

In [None]:
question ='What determines the meaning of a word?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What determines the meaning of a word?
  [/INST]
  
  The meaning of a word, according to Ludwig Wittgenstein's philosophy, is not determined by a single definition or an
essential property that the word refers to. Instead, Wittgenstein argues that the meaning of a word is determined by its use
in the context of a language game. This idea is central to his later philosophy, particularly in the Philosophical
Investigations.

Wittgenstein uses the term "language game" to describe the various ways in which language is used in different contexts. For
example, the language game of giving orders is different from the language game of describing an object. According to
Wittgenstein, the meaning of a word is determined by the rules of the specific lang

In [None]:
question = 'How did Wittgenstein use the example of "games" to illustrate the family resemblance concept?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  How did Wittgenstein use the example of "games" to illustrate the family resemblance concept?
  [/INST]
  
  Wittgenstein's concept of "family resemblances" is a key idea in his later philosophy, particularly in the "Philosophical
Investigations". He uses the example of "games" to illustrate this concept.

Wittgenstein argues that there is no single characteristic that all games share. Instead, he suggests that there is a series
of similarities and overlaps between different types of games, much like the members of a family share similar features but
are not identical.

For instance, consider the games of chess, football, and hopscotch. Chess involves strategy, rules, and competition, while
football involves physical activity, teamwork, and rul

In [None]:
question ='How do philosophical problems arise from misunderstandings of language?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  How do philosophical problems arise from misunderstandings of language?
  [/INST]
  
  Wittgenstein, in his later philosophy, argued that many philosophical problems arise from misunderstandings of language.
This idea is central to his philosophical method, which he called "therapeutic" - it aims to help us recognize and dissolve
these misunderstandings, rather than providing positive doctrines or theories.

Wittgenstein believed that our use of language is deeply intertwined with our forms of life, which include our practices,
customs, and ways of acting. When we encounter a philosophical problem, we often try to understand it by constructing
theories or propositions about it. However, according to Wittgenstein, these theories are often based 

In [None]:
question ='Can you provide examples of how philosophical problems arise from the misuse of language?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Can you provide examples of how philosophical problems arise from the misuse of language?
  [/INST]
   Absolutely, I'd be happy to explain that! Ludwig Wittgenstein, a prominent Austrian-British philosopher, is well-known for
his exploration of the relationship between language and thought, particularly in his later philosophy. He argued that many
philosophical problems arise due to misunderstandings or misuses of language.

For instance, consider the problem of other minds. This is the philosophical question of how we can know that other people
have minds and experiences similar to our own. This problem arises, in part, due to our use of the word "mind". We often use
this term in a vague or unclear way, leading to confusion and philosophical p

In [None]:
question ='In what ways does the examination of pain\'s sameness challenge the understanding of subjective experiences?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  In what ways does the examination of pain's sameness challenge the understanding of subjective experiences?
  [/INST]
  
  That's a great question! Wittgenstein's discussion of pain's sameness, which can be found in his later philosophical work,
particularly in the "Philosophical Investigations," challenges our understanding of subjective experiences in several ways.

First, Wittgenstein questions the idea that pain is a private, internal experience that is solely subjective. He argues that
our understanding of pain is actually grounded in our shared language and behavior. When we say we're in pain, we're not
merely describing a private sensation; rather, we're also signaling our need for help or expressing our distress. This means
that pain, w

In [None]:
question ='What is the purpose of the analogy with the toolbox?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is the purpose of the analogy with the toolbox?
  [/INST]
  
  The analogy with the toolbox is a key concept in Ludwig Wittgenstein's philosophy, particularly in his later work, such as
the "Philosophical Investigations." The purpose of this analogy is to illustrate the idea that language is like a toolbox,
with each word being a tool that has a specific use in a specific context.

Wittgenstein argues that the meaning of a word is not determined by some inherent property or essence, but rather by its use
in a language-game, which is a social activity governed by rules. Just as a hammer has a specific use in a toolbox, a word
has a specific use in a language-game.

Furthermore, Wittgenstein suggests that the meaning of a word can only be un

In [None]:
question ='What does following a rule entail?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does following a rule entail?
  [/INST]
   Following a rule, according to Ludwig Wittgenstein's philosophy, is a complex process that involves understanding,
interpretation, and application. In Wittgenstein's "Philosophical Investigations," he argues that there's no such thing as a
private language, and that meaning comes from use within a language game. This concept is closely related to the idea of
following a rule.

When Wittgenstein talks about following a rule, he's not just referring to a set of instructions or a formula. Instead, he's
referring to the entire social context and practice surrounding that rule. So, to follow a rule, one must understand the
rule, but also understand the context and purpose of the rule within a particula

In [None]:
question ='Can you describe Wittgenstein\'s picture theory of meaning?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Can you describe Wittgenstein's picture theory of meaning?
  [/INST]
  
  Absolutely, I'd be happy to explain Wittgenstein's picture theory of meaning. This theory is presented in his early work,
the Tractatus Logico-Philosophicus.

The picture theory of meaning is a theory about the relationship between language and the world. According to this theory,
propositions (i.e., sentences) in language can represent the world accurately, just like a picture can depict a scene.

Wittgenstein argues that for a proposition to represent a state of affairs in the world, it must have the same logical
structure as the state of affairs. This is similar to how a picture's structure must match the scene it depicts for it to be
a faithful representation.

The pr

In [None]:
question ='What does Wittgenstein say about tautologies and contradictions in the Tractatus?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does Wittgenstein say about tautologies and contradictions in the Tractatus?
  [/INST]
  
  Wittgenstein, in his early work Tractatus Logico-Philosophicus, discusses tautologies and contradictions primarily in the
context of his picture theory of language. According to this theory, propositions are meaningful if they picture a possible
state of affairs in the world.

Tautologies and contradictions, for Wittgenstein, are at the extremes of this spectrum of meaningful propositions. 

A tautology, in Wittgenstein's view, is a proposition that is true in all possible states of affairs. It's a statement that
adds no new information about the world because it's true regardless of the state of the world. For example, the statement
"Either it's ra

In [None]:
question ='Why is the notion of a criterion for correctness important for Wittgenstein\'s private language argument?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  Why is the notion of a criterion for correctness important for Wittgenstein's private language argument?
  [/INST]
  
  The notion of a criterion for correctness is central to Ludwig Wittgenstein's private language argument, which is a key
concept in his later philosophy. The argument is presented in his posthumously published work, "Philosophical
Investigations."

Wittgenstein argues that the idea of a private language, a language only understood by its user, is incoherent. He uses the
example of someone inventing a private language to describe their sensations, such as pain. However, Wittgenstein argues that
without a criterion for correctness, there can be no meaningful language.

A criterion for correctness is a standard or rule that determ

In [None]:
question ='What does Wittgenstein say about aspect perception?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What does Wittgenstein say about aspect perception?
  [/INST]
  
  Wittgenstein's philosophy on aspect perception is primarily discussed in his later work, "Philosophical Investigations." He
argues that our perception of aspects, or the way we see things, is not solely determined by the stimuli we receive from the
world. Instead, he suggests that our perception is heavily influenced by our language and the concepts we have at our
disposal.

Wittgenstein uses the example of seeing a drawing as either a duck or a rabbit, depending on how we're instructed to see it.
This demonstrates that the same image can be perceived in different ways, depending on our mental set or the concept we
apply. This is not a matter of seeing new aspects, but rather se

In [None]:
question ='What is the paradox of ostension?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is the paradox of ostension?
  [/INST]
  
  The paradox of ostension is a concept that comes from Ludwig Wittgenstein's later philosophy, particularly his work in the
Philosophical Investigations. Ostension is a way of explaining the meaning of a word by pointing to an example of its use.
The paradox arises when we try to use ostension to explain the meaning of a word in a precise and definitive way.

The paradox goes like this: if I point to a particular use of a word and say "this is what I mean by this word," I'm using
the word to explain the word. But in order to understand my use of the word, you have to already understand the word. So, the
explanation doesn't actually help to clarify the meaning of the word. It's a kind of circular a

In [None]:
question ='What is Wittgenstein trying to show with the builders\' language example?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What is Wittgenstein trying to show with the builders' language example?
  [/INST]
  
  Wittgenstein, in his later philosophy, uses the builders' language example to illustrate the idea that language is a tool
used for specific purposes within specific contexts. In this example, he describes a simple language consisting of orders and
reports used by two builders, Peter and Paul, to coordinate their work. The builders' language is limited, consisting only of
words like "slab," "pillar," "beam," and "column," which are used to request and acknowledge the delivery of building
materials.

Wittgenstein is trying to show that the meaning of these words is determined by their use in the language game, rather than
by reference to objects or concepts in

In [None]:
question = 'What did Wittgenstein show using the "five red apples" example?'
query_model(question)

<s> 
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein.   Your task is to answer the
following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  What did Wittgenstein show using the "five red apples" example?
  [/INST]
  
  Wittgenstein, in his later philosophy, used the "five red apples" example to illustrate his concept of language games and
meaning. In this example, he asked us to imagine a language game where a builder asks for "five red apples" using a language
consisting of color and number words. The builder's assistant, who understands this specific language game, can respond
correctly by providing five red apples.

Wittgenstein showed that the meaning of a word or a sentence is not determined by some inherent property or a private mental
image but by its use in a specific language game. In other words, the meaning is determined by the rules of the particular
language game in wh

# Generate Wittgenbot Answers for LLM as a Judge

## Query Model Function (Sliced Output)

In [None]:
def query_model(question):

  prompt_template = """
  [INST]
  You are a helpful chatbot assistant specialized in the philosophy of Ludwig Wittgenstein. \
  Your task is to answer the following question about the philosophy of Ludwig Wittgenstein in a conversational, clear and coherent tone:
  {question}
  [/INST]
  """
  prompt = prompt_template.format(question=question)

  encodeds = tokenizer(prompt,
                       return_tensors="pt",
                       add_special_tokens=True)

  model_inputs = encodeds.to('cuda:0')

  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens=1000,
                                 do_sample=True,
                                 top_p=0.9,
                                 temperature=0.7,
                                 pad_token_id=tokenizer.unk_token_id)

  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

  response = decoded[0][len(prompt):].strip()

  return response

## Test Questions

In [None]:
TLP_test_questions = [
    'How does Wittgenstein define the concept of a "proposition" in the Tractatus?',
    'What role does logic play in the structure of reality as presented in the Tractatus?',
    'Can you explain the picture theory of language proposed in the Tractatus?',
    'How does Wittgenstein distinguish between what can be said and what can only be shown?',
    'How are facts and states of affairs conceptualized in the Tractatus?',
    'What is the significance of the limit of language in the Tractatus?',
    'How does the Tractatus address the relationship between language and reality?',
    'What is the purpose of the ladder metaphor in the conclusion of the Tractatus?',
    'What is meant by "propositional logic" in the Tractatus, and how is it significant to the work’s overall argument?',
    'How does the Tractatus critique the possibility of metaphysical propositions?',
    'What is the role of silence in Wittgenstein’s philosophy as expressed in the Tractatus?',
    'What did Wittgenstein mean by "philosophy is not a body of doctrine but an activity" in the Tractatus?',
    'How does Wittgenstein treat the problem of solipsism in the Tractatus?',
    'What does Wittgenstein mean by "The limits of my language mean the limits of my world"?',
    'How does the Tractatus conceptualize the idea of sense and nonsense in language?',
]

PI_test_questions = [
    'In Wittgenstein\'s Philosophical Investigations, what determines the meaning of a word?',
    'How did Wittgenstein use the example of "games" in Philosophical Investigations to illustrate the family resemblance concept?',
    'In Philosophical Investigations, what is the concept of family resemblance?',
    'In the context of Philosophical Investigations, is the existence of a private language possible?',
    'How can we confirm that someone is following a rule, according to Philosophical Investigations?',
    'In Philosophical Investigations, how can the concept of sameness be used to teach a rule?',
    'What is the role of language-games in Wittgenstein\'s Philosophical Investigations?',
    'How is the idea that mental processes form the basis of our understanding of language critiqued in Philosophical Investigations?',
    'Do private mental objects exist according to Wittgenstein’s Philosophical Investigations?',
    'What is the relationship between forms of life and language in Philosophical Investigations?',
    'In Philosophical Investigations, how is it explained that the meaning of a word is its use in language?',
    'How do philosophical problems arise from misunderstandings of language, as discussed in Philosophical Investigations?',
    'What is problematic about the Augustinian view on meaning in Philosophical Investigations?',
    'In Philosophical Investigations, what is said about the misguided nature of philosophical questions?',
    'What is the purpose of the analogy with the toolbox in Philosophical Investigations?',
    'In what ways does the notion of "language-games" in Philosophical Investigations challenge traditional epistemology?',
    'What implications does the critique of private language in Philosophical Investigations have for theories of consciousness?',
    'How is the concept of pain used in Philosophical Investigations to argue against the possibility of a private language?',
    'In what ways does the examination of pain\'s sameness in Philosophical Investigations challenge the understanding of subjective experiences?',
    'What are the limitations of language, as discussed in Wittgenstein’s Philosophical Investigations?',
    'In Philosophical Investigations, what is the relationship between forms of life and understanding philosophy?'
]

test_questions = {
    'TLP': TLP_test_questions,
    'PI': PI_test_questions
}

## Generate Answers for Test Questions & Save to Drive

In [None]:
wittgenbot_answers = {}

for work, questions in test_questions.items():

  wittgenbot_answers[work] = {}

  for question in questions:

    answer = query_model(question)

    wittgenbot_answers[work][question] = answer

wittgenbot_answers_json = json.dumps(wittgenbot_answers, indent=4)

In [None]:
wittgenbot_answers_file_name = 'wittgenbot_answers.txt'
wittgenbot_answers_file_path = data_dir + wittgenbot_answers_file_name

wittgenbot_answers_json = json.dumps(wittgenbot_answers, indent=4)

with open(wittgenbot_answers_file_path, 'w') as file:
    file.write(wittgenbot_answers_json)

print(f'File saved as {wittgenbot_answers_file_name}')

File saved as wittgenbot_answers.txt
