In [1]:
%load_ext autoreload 
%autoreload 2

from preprocessing import FileIO, Vectorizor
from opensearch_interface import OpenSearchClient
from reranker import ReRanker
from index_templates import youtube_body

import os
import time
from rich import print
from dotenv import load_dotenv
load_env=load_dotenv('./.env', override=True)
from typing import Literal, List
from tiktoken_functions import Tokenizer
from openai_interface import GPT_Turbo
from prompt_templates import question_answering_prompt, question_answering_system, test_prompt
import tiktoken

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
print(test_prompt)

In [17]:
osclient=OpenSearchClient()
reranker = ReRanker()
osclient.show_indexes()

health status index                              uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   kw-impact-theory                   2MjMun4bQYOoeUpv5UsJxg   3   1      33164            0     29.4mb         29.4mb
yellow open   semantic-impact-theory-196         SY2nXyvmQ9i5LAS4hmn82g   3   1      37007            0    694.6mb        694.6mb
yellow open   kw-impact-theory-196               vsuHausxRb6EjysQriOX5w   3   1      37007            0     30.5mb         30.5mb
yellow open   paul-graham3                       -74ZPvxoSMmtCPSzAI9o1A   1   1         18            0    768.2kb        768.2kb
yellow open   semantic-impact-theory-128         FJKOre3yT9aFxlF-_TvcTA   3   1      60380            0        1gb            1gb
green  open   .opendistro_security               _QeSqO4CQN2IU8VpE9hnPw   1   0         10            0     75.6kb         75.6kb
yellow open   semantic-impact-theory             5khyvtPQRASCMmhZiQTcVw   3   1      33164

In [5]:
index_name = 'impact-theory-minilm-196'

In [170]:
query = 'How can one master the art of life?'

In [171]:
def retrieve_pipeline(query: str, 
                      index_name: str,
                      search_type: Literal['kw', 'vector', 'hybrid'], 
                      retriever: OpenSearchClient, 
                      reranker: ReRanker,
                      tokenizer: tiktoken.core.Encoding,
                      kw_size: int=50,
                      vec_size: int=50,
                      top_k: int=4,
                      rerank_threshold: float=None,
                      token_threshold: int=4000,
                      return_text: bool=True,
                      verbose: bool=True
                      ) -> List[dict]:
     
    if search_type == 'kw':
        results = retriever.keyword_search(query=query, index=index_name, size=kw_size)
    elif search_type == 'vector':
        results = retriever.vector_search(query=query, index=index_name, size=vec_size)
    elif search_type == 'hybrid':
        results = retriever.hybrid_search(query=query, 
                                          kw_index=index_name, 
                                          vec_index=index_name, 
                                          kw_size=kw_size,
                                          vec_size=vec_size)
        
    reranked = reranker.rerank(results, query, top_k=top_k, threshold=rerank_threshold)
    text = ' '.join([r['_source']['content'] for r in reranked])
    token_count = len(tokenizer.encode_batch(text))
    if verbose:
        print(f'Total Initial Token Count: {token_count}')
    if token_count > token_threshold:
        print('Token count exceeds token count threshold, reducing size of returned results below token threshold')
        while token_count > token_threshold:
            num_results = len(reranked)
            reranked = reranked[:num_results-1]
            text = ' '.join([r['_source']['content'] for r in reranked])
            token_count = len(tokenizer.encode_batch(text))
        if verbose:
            print(f'Total Final Token Count: {token_count}')
    if return_text:
        return text
    return reranked

In [172]:
resp = retrieve_pipeline(query, index_name, 'hybrid', osclient, reranker, tokenizer, top_k=5, return_text=False)

In [3]:
gpt = GPT_Turbo()

In [16]:
def generate_prompt(base_prompt: str, query: str, results: List[dict]) -> str:
    contexts = '\n\n'.join([r['content'] for r in results])
    prompt = base_prompt.format(question=query, context=contexts)
    return prompt
    
resp = [{'content': '''
The potential misuse of ChatGPT and other Large
Language Models (LLMs) has raised concerns regarding the
dissemination of false information, plagiarism, academic dis-
honesty, and fraudulent activities. Consequently, distinguishing
between AI-generated and human-generated content has emerged
as an intriguing research topic. However, current text detection
methods lack precision and are often restricted to specific tasks
or domains, making them inadequate for identifying content
generated by ChatGPT.
In this paper, we propose an effective ChatGPT detector named
DEMASQ, which accurately identifies ChatGPT-generated con-
tent. Our method addresses two critical factors: (i) the distinct
biases in text composition observed in human- and machine-
generated content and (ii) the alterations made by humans
to evade previous detection methods. DEMASQ is an energy-
based detection model that incorporates novel aspects, such
as (i) optimization inspired by the Doppler effect to capture
the interdependence between input text embeddings and output
labels, and (ii) the use of explainable AI techniques to generate
diverse perturbations.
To evaluate our detector, we create a benchmark dataset
comprising a mixture of prompts from both ChatGPT and
humans, encompassing domains such as medical, open Q&A,
finance, wiki, and Reddit. Our evaluation demonstrates that
DEMASQ achieves high accuracy in identifying content gen
'''}]

In [17]:
query = "What does DEMASQ do?"
prompt = generate_prompt(base_prompt=question_answering_prompt, query=query, results=resp)
print(prompt)

In [30]:
report = []

resp = gpt.get_completion_from_messages(prompt=prompt, 
                                        system_message=question_answering_system, 
                                        max_tokens=250, 
                                        stream=False,
                                        show_response=True)
            # # join method to concatenate the elements of the list 
            # # into a single string, 
            # # then strip out any empty strings
print(resp['choices'][0].message.content)
            # # result = "".join(report).strip()
            # # result = result.replace("\n", "")        
            # # res_box.markdown(f'*{result}*') 
            # print(report[0])
print()

In [38]:
paper = '''
is mathematically derived for a neural network, “plug and chug” computations can be leveraged to
great efficiency to produce more-performant and -generalized models, using very little data.
Alongside escalating size and complexity, LLMs are becoming ever more central to applied work
in artificial intelligence (AI). Superlative self-attention-based models in natural language processing
(NLP) now demonstrate capabilities attracting research interest and investment alongside counter-
parts in computer vision, like the diffusion probabilistic models (Ho et al., 2020) in DAll-E (Ramesh
et al., 2021) and Stable Diffusion (Rombach et al., 2022). The potential to further amplify capabil-
ities by combining text, images, and other modalities to construct even more powerful models, as
exemplified by the likes of KOSMOS-1 (Huang et al., 2023) and GPT-4 (OpenAI, 2023), suggests
staggering advancements may be on the cusp of development.
Still, our collective understanding of the inner workings of these models is far from complete. Lim-
ited understanding in the internal mechanisms of models hinders our ability to fully exploit their
capabilities, while simultaneously raising challenges (Bommasani et al., 2022). Reliability and
safety is a primary concern: LLMs are prone to generating biased and unreliable text, and diffu-
sion models produce distorted images that conflict with basic human perception. The unpredictable
behaviors of neural models in novel contexts challenges their operational benefits to humans via
their (in)abilities to avoid inadvertent harms (Kenton et al., 2021; Weidinger et al., 2021; Tamkin
et al., 2021; Hendrycks et al., 2023). Efficiency is also a major concern (Shen et al., 2023)—
backpropagation is ubiquituous in optimization, and still entails a high computational cost, particu-
larly as models scale over larger amounts of data (Rumelhart et al., 1986a;b), escalating processing
requirements.
We ask: “how can these challenges can be overcome to ensure models are reliable, interpretable,
and efficient?”, and posit that understanding the optimization processes underlying these models is
crucial. Perhaps, grasping the intricacies of model optimization will allow for a more straightfor-
ward approach, requiring fewer iterations to achieve the same or better quality results? Furthermore,
understanding how models optimize allows us to adjust specific parameters in the weight matrices,
enabling models to perform in a desired manner. Here, we extend our knowledge of explicit solu-
tions from single-layer feed-forward neural networks, to an architecture with compositionally-linked
feed-forward and self-attention layers. Our work demonstrates an explicit optimization technique
that significantly accelerates model training processes, reaching optima far beyond the reach of
backpropagation, alone. So when this solution is applied to self-attention networks, it accelerates
time-to-optimization and finds vastly better optima with better generalization qualities, offering a
vital alternative to the current trends in neural network training.
Explicit solutions relate to recent work focused on finding that attention layers converge in direction
to SVM solutions (Tarzanagh et al., 2023) and that transformers may rediscover standard estimation
algorithms (Aky¨urek et al., 2023). Explicit solutions also connect to recent discoveries finding gen-
eralization in overparametrized networks occurs beyond the point of dataset memorization (Power
et al., 2022). Likewise, this work is also connected to efforts aimed at improving the overall train-
ing efficiency of transformers, such as one attention type developed to reduce memory reads/writes
between GPU high bandwidth memory and on-chip SRAM (Dao et al., 2022).
By conducting ablation experiments over a large number of LM architectural variants, we discover
that “warming up” (warm-start) models with the explicit solution for self-attention leads to better
generalization, more rapidly. This discovery is largely invariant to the scales of training data utilized,
i.e., warm-starts lead to objectively better models on both large and small data sets. Furthermore, our
findings indicate that iterative optimization with backpropagation only leads to generalized models
with the explicit solution—models initialized randomly at least appear to require more computation
than any conducted experiments, regardless of scale. We conjecture that model disorientation, in
fact, leads to randomly-initialized models not achieving their full potential (regardless of size), and
discuss this effect in relation to how LLMs might be overcoming disorientation in applications.
2 SAFFU LAYER ARCHITECTURE
This derivation began by analyzing word2vec’s continuous bag-of-words (CBOW) variant (Mikolov
et al., 2013; Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S., and Dean,
Jeff, 2013), and was generalized to simple single-layer LMs, and then all feed-forward neural net-
'''

In [40]:
token_limit = 16000

In [60]:
message = 660

In [61]:
input = 256 * 3 * 5
input

3840

In [62]:
output = 500

In [63]:
def cost(message, input, output):
    input_cost = (message + input)/1000 * 0.001
    output_cost = output/1000 * 0.002
    total_tokens = message + input + output
    print(total_tokens)
    return input_cost + output_cost

In [65]:
cost(message, input, output) * 100

0.55

Bad pipe message: %s [b'\xd0\x1a\xa8\xc2\\\xaa\x10\xc0\x06oS\xaf\xe7\xaa\xfdD&i Z\xae\xc6"2O\xdf\xfd;DE\x84\xe2\xa7\xcfD\x00\xf4\xb3,\xd4\xdf\xac\x85wg\xc24\xba\x93\xdc\'\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08', b'\x0b\x08\x04\x08\x05\x08\x06\x04\x01']
Bad pipe message: %s [b"J\x11S\xbcTu\xb5a\x8d\xbf\x83\xc7Q\xa6\xec\x0b\xd9O\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x0