## Perplexity

In [5]:
from models.utils import OSModel
from models.utils import calculate_perplexity
from pprint import pp

prompt_template= """
<|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 23 July 2024

You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>

[[question]]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

model = OSModel(
    model_path='/data/users/fraspant/LLMs/llama-3.1-8b-it.gguf', n_ctx=512, logits_all=True
)

In [6]:
prompt = prompt_template.replace('[[question]]', 'Is the sun hot or cold?')

In [9]:

for raw_grammar in [None, 'root ::= "The sun is extremely hot" ', 'root ::= "The sun is extremely cold" ', 'root ::= "The sun is a square" ']:
    response = model.invoke(prompt=prompt, logprobs = True, max_tokens=150, 
                            raw_grammar=raw_grammar,
                            temperature=0.5,
                            top_p = 1,
                            top_k=10,
                            min_p=0.1,
                            tfs_z=1,
                            repeat_penalty=1,)

    print('Grammar:', raw_grammar)
    print('Response:', response['choices'][0]['text'])
    print('Perplexity:', calculate_perplexity(response['choices'][0]['logprobs']))
    print('Logprobs:\n')
    pp(response['choices'][0]['logprobs']['top_logprobs'][:10])
    print('\n')
    print('#'*25)
    print('\n')

Grammar: None
Response: The sun is extremely hot. Its surface temperature is about 5,500 degrees Celsius (9,932 degrees Fahrenheit), and its core is a scorching 15,000,000 degrees Celsius (27,000,000 degrees Fahrenheit). This intense heat is what makes the sun shine and gives us light and warmth.
Perplexity: 1.163486123085022
Logprobs:

[{'The': np.float32(-0.00028093686)},
 {' sun': np.float32(-0.03550408)},
 {' is': np.float32(-0.003335153)},
 {' extremely': np.float32(-0.7449534)},
 {' hot': np.float32(-0.001265202)},
 {'.': np.float32(-0.07450456)},
 {' Its': np.float32(-0.6453549)},
 {' surface': np.float32(-0.003575007)},
 {' temperature': np.float32(-0.0024374798)},
 {' is': np.float32(-0.14349708)}]


#########################


Grammar: root ::= "The sun is extremely hot" 
Response: The sun is extremely hot
Perplexity: 1.1700749397277832
Logprobs:

[{'The': np.float32(-0.00028093686)},
 {' sun': np.float32(-0.03550408)},
 {' is': np.float32(-0.003335153)},
 {' extremely': np.f

In [None]:

response = model.invoke(prompt=prompt, logprobs = True, max_tokens=150, 
                        raw_grammar=raw_grammar,
                        temperature=0.5,
                        top_p = 1,
                        top_k=10,
                        min_p=0.1,
                        tfs_z=1,
                        repeat_penalty=1,)

print('Response:', response['choices'][0]['text'])
print('Perplexity:', calculate_perplexity(response['choices'][0]['logprobs']))
print('Logprobs:\n')
pp(response['choices'][0]['logprobs']['top_logprobs'])


Response: The sun is extremely hot
Perplexity: 1.1700749397277832
Logprobs:

[{'The': np.float32(-0.00028093686)},
 {' sun': np.float32(-0.03550408)},
 {' is': np.float32(-0.003335153)},
 {' extremely': np.float32(-0.7449534)},
 {' hot': np.float32(-0.001265202)}]


In [None]:
raw_grammar = """root ::= "The sun is extremely cold" """

response = model.invoke(prompt=prompt, logprobs = True, max_tokens=150, 
                        raw_grammar=raw_grammar,
                        temperature=0.5,
                        top_p = 1,
                        top_k=10,
                        min_p=0.1,
                        tfs_z=1,
                        repeat_penalty=1,)

print('Response:', response['choices'][0]['text'])
print('Perplexity:', calculate_perplexity(response['choices'][0]['logprobs']))
print('Logprobs:\n')
pp(response['choices'][0]['logprobs']['top_logprobs'])


Response: The sun is extremely cold
Perplexity: 26.019214630126953
Logprobs:

[{'The': np.float32(-0.00028093686)},
 {' sun': np.float32(-0.03550408)},
 {' is': np.float32(-0.003335153)},
 {' extremely': np.float32(-0.7449534)},
 {' hot': np.float32(-0.001265202), ' cold': np.float32(-15.510102)}]


In [None]:
raw_grammar = """root ::= "The sun is a square" """

response = model.invoke(prompt=prompt, logprobs = True, max_tokens=150, 
                        raw_grammar=raw_grammar,
                        temperature=0.5,
                        top_p = 1,
                        top_k=10,
                        min_p=0.1,
                        tfs_z=1,
                        repeat_penalty=1,)

print('Response:', response['choices'][0]['text'])
print('Perplexity:', calculate_perplexity(response['choices'][0]['logprobs']))
print('Logprobs:\n')
pp(response['choices'][0]['logprobs']['top_logprobs'])

Response: The sun is a square
Perplexity: 335.3362121582031
Logprobs:

[{'The': np.float32(-0.00028093686)},
 {' sun': np.float32(-0.03550408)},
 {' is': np.float32(-0.003335153)},
 {' extremely': np.float32(-0.7449534), ' a': np.float32(-6.5223007)},
 {' massive': np.float32(-0.18033041), ' s': np.float32(-9.631769)},
 {'izzling': np.float32(-0.030016523), 'quare': np.float32(-18.697609)}]


## Depth of proof

In [None]:
import json
import sys
from models.symbolic_solvers.fol_solver.Formula import FOL_Formula
from models.symbolic_solvers.fol_solver.prover9_solver import FOL_Prover9_Program

def check_fol_validity(premises, conclusion):
    # Validate premises
    fol_premises = []
    for premise in premises:
        fol_formula = FOL_Formula(premise)
        
        if not fol_formula.is_valid:
            print(f"Invalid FOL premise: {premise}")
            return False
        fol_premises.append(fol_formula)
    
    # Validate conclusion
    fol_conclusion = FOL_Formula(conclusion)
    if not fol_conclusion.is_valid:
        print(f"Invalid FOL conclusion: {conclusion}")
        return False
    
    # Create logic program
    logic_program = {
        "fol_rules": premises,
        "fol_conc": conclusion
    }
    
    # Use FOL_Prover9_Program to check validity
    prover9_program = FOL_Prover9_Program(logic_program)
    
    if not prover9_program.flag:
        print(f"Error in logic program: {prover9_program.formula_error}")
        return False
    
    answer, error_message = prover9_program.execute_program()
    if error_message:
        print(f"Error during execution: {error_message}")
        return True
    
    return True, prover9_program, answer


In [None]:

premises = [
      "∀x (TalentShows(x) → Engaged(x))",
      "∀x (TalentShows(x) ∨ Inactive(x))",
      "∀x (Chaperone(x) → ¬Students(x))",
      "∀x (Inactive(x) → Chaperone(x))",
      "∀x (AcademicCareer(x) → Students(x))",
      "(Engaged(bonnie) ∧ Students(bonnie)) ⊕ (¬Engaged(bonnie) ∧ ¬Students(bonnie))"
    ]
conclusion = "AcademicCareer(bonnie) ⊕ Chaperone(bonnie) → AcademicCareer(bonnie) ⊕ Inactive(bonnie)"

is_valid, prover9_program, answer = check_fol_validity(premises, conclusion)
if is_valid:
    print("The premises and conclusion are valid.")
else:
    print("The premises and conclusion are not valid.")

In [None]:
print(prover9_program.prover.proof())

Prover9 (64) version 2009-11A, November 2009.
Process 57838 was started by fraspant on hotpot,
Mon Oct 28 18:00:00 2024
The command was "models/symbolic_solvers/Prover9/bin/prover9".



% -------- Comments from original proof --------
% Proof 1 at 0.00 (+ 0.00) seconds.
% Length of proof is 27.
% Level of proof is 6.
% Maximum clause weight is 2.000.
% Given clauses 0.


1 (all x (TalentShows(x) -> Engaged(x))).  [assumption].
2 (all x (TalentShows(x) | Inactive(x))).  [assumption].
3 (all x (Chaperone(x) -> -Students(x))).  [assumption].
4 (all x (Inactive(x) -> Chaperone(x))).  [assumption].
5 (all x (AcademicCareer(x) -> Students(x))).  [assumption].
6 Engaged(Bonnie) & Students(Bonnie) & -(-Engaged(Bonnie) & -Students(Bonnie)) | -(Engaged(Bonnie) & Students(Bonnie)) & -Engaged(Bonnie) & -Students(Bonnie).  [assumption].
7 (AcademicCareer(Bonnie) & -Chaperone(Bonnie) | -AcademicCareer(Bonnie) & Chaperone(Bonnie) -> AcademicCareer(Bonnie)) & -Inactive(Bonnie) | -(AcademicCareer(Bonni