In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ece1786/project

Mounted at /content/drive
/content/drive/MyDrive/ece1786/project


In [9]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from openai import OpenAI
import torch
import os

class JpClassifier:
    '''
    Prerequisite:
    1. A `tokenizer` folder under the same directory to be loaded by `AutoTokenizer`
    2. A `model` folder under the same directory to be loaded by `AutoModelForSequenceClassification`
    3. `OPENAI_API_KEY` set in `os.environ`
    '''

    def __init__(self):
        self.labels = {0: "J", 1: "P"}
        self.tokenizer = None
        self.gpt2 = None
        #self.gpt4 = OpenAI()
        self.gpt4 = OpenAI(api_key='OPENAI_API_KEY')

        self._load_tokenizer()
        self._load_gpt2()

    def _load_tokenizer(self):
        tokenizer_path = "cpt/tokenizer"
        assert(os.path.exists(tokenizer_path))

        print("Loading existing tokenizer from:", tokenizer_path)
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

    def _load_gpt2(self):
        model_path = 'cpt/model'
        assert(os.path.exists(model_path))

        print("Loading existing model from:", model_path)
        self.gpt2 = AutoModelForSequenceClassification.from_pretrained(model_path)

    def _init_gpt4(self):
        prompt_path = 'prompt.txt'
        os.environ['OPENAI_API_KEY'] = 'OPENAI_API_KEY'
        # print(os.environ['OPENAI_API_KEY'])
        assert('OPENAI_API_KEY' in os.environ)
        assert(os.path.exists(prompt_path))


        with open(prompt_path, 'r') as file:
            prompt = file.read()
        # some api version doens't support memory, so no need to init.
        #    #print(prompt)
        #    _ = self.gpt4.chat.completions.create(
        #        model="gpt-4o",
        #        messages=[
        #            {"role": "system", "content": prompt},
        #        ]
        #    )

        return prompt

    def _to_probs(self, logits: torch.Tensor, dim=-1):
        max_logits = torch.max(logits, dim=dim, keepdim=True).values
        shifted_logits = logits - max_logits
        exp_logits = torch.exp(shifted_logits)
        probs = exp_logits / torch.sum(exp_logits, dim=dim, keepdim=True)
        return probs.tolist()[0]

    def classify(self, text: str):
        encoded_input = self.tokenizer(text, truncation=True, padding=True, max_length=100, return_tensors="pt")
        outputs = self.gpt2(**encoded_input)
        logits = outputs.logits
        return self._to_probs(logits)

    def _generate_question(self, prompt, probs: list):
        print(f'Perceiving: {probs[0]*100}%, Judging: {probs[1]*100}%')
        return self.gpt4.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": prompt}] +
            [{"role": "user", "content": f'Perceiving: {probs[0]*100}%, Judging: {probs[1]*100}%'}],
            temperature=0.7,
            #temperature=0.5,
            top_p=1
        ).choices[0].message.content

    def start(self):
        responses = []
        prompt = self._init_gpt4()
        # First question start with 0% and 0%
        question = self._generate_question(prompt, [0.5,0.5])
        answer = input(f'Question: {question}\n')
        responses.append(question)
        responses.append(answer)
        probs = self.classify('\n'.join(responses))
        #print(probs)

        # Second question uses the probs from last question
        question = self._generate_question(prompt, probs)
        answer = input(f'Question: {question}\n')
        responses.append(question)
        responses.append(answer)
        probs = self.classify('\n'.join(responses))
        #print(probs)

        # Third question uses the probs from last question
        question = self._generate_question(prompt, probs)
        answer = input(f'Question: {question}\n')
        responses.append(question)
        responses.append(answer)
        probs = self.classify('\n'.join(responses))
        #print(probs)

        return 'J' if probs[1] > probs[0] else 'P'


In [10]:
c2 = JpClassifier().start()
print(c2)

Loading existing tokenizer from: cpt/tokenizer
Loading existing model from: cpt/model
Perceiving: 50.0%, Judging: 50.0%
Question: Do you prefer to have detailed plans before starting a project, or are you comfortable diving in and adapting as you go?
detailed plans
Perceiving: 46.59121334552765%, Judging: 53.40878963470459%
Question: Do you prefer planning your day in advance or keeping your schedule flexible?
plan in advance
Perceiving: 50.88996887207031%, Judging: 49.110034108161926%
Question: Do you prefer to keep your options open or stick to a plan once you've made it?
stick to a plan
J
