In [1]:
import pip_system_certs
import ssl
import json
import requests
from auth import token
import re
from ontology_retrieval.database import VectDB

In [2]:
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

In [3]:
class Gigachat:
    def __init__(self):
        self.gettoken()
        
    def gettoken(self):
        url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"

        payload='scope=GIGACHAT_API_PERS'
        headers = {
          'Content-Type': 'application/x-www-form-urlencoded',
          'Accept': 'application/json',
          'RqUID': '857236fb-970f-4262-93e8-bf3c0b66d18e',
          'Authorization': token
        }

        response = requests.request("POST", url, headers=headers, data=payload)
        self.token = response.json()['access_token']
    
    def generate(self, prompt):
        url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"

        payload = json.dumps({
          "model": "GigaChat",
          "messages": [
            {
              "role": "user",
              "content": prompt
            }
          ],
          "temperature": 1,
          "top_p": 0.1,
          "n": 1,
          "stream": False,
          "max_tokens": 512,
          "repetition_penalty": 1,
          "update_interval": 0
        })
        headers = {
          'Content-Type': 'application/json',
          'Accept': 'application/json',
          'Authorization': f'Bearer {self.token}'
        }

        response = requests.request("POST", url, headers=headers, data=payload)
        
        if response.status_code != 200:
            self.gettoken()
            return self.generate(prompt)
        return response.json()['choices'][0]['message']['content']

In [4]:
gigachat = Gigachat()

In [5]:
gigachat.generate('Как дела?')

'Всё хорошо, спасибо за интерес. Как вы поживаете?'

# Prompting

In [6]:
def top_one_llm(question_text, gigachat):
    prompt = 'You need to answer the question below only with the name of person, location, chatacter etc. ' \
                'Do not give any additional information, facts and thoughts.\n' \
                'Question: ' + question_text + 'Your answer:'
    return gigachat.generate(prompt)

In [8]:
top_one_llm("Who wrote 'Gone with The Wind?'", gigachat)

'Margaret Mitchell'

In [20]:
def options_llm(question_text, n_vars, gigachat):
    prompt = f'You need to answer the question below only with {n_vars} names of people, locations, chatacters etc. ' \
            'Do not give any additional information, facts and thoughts. ' \
            f'Generate {n_vars} most likely options, each one starting with newline.\n' \
            'Question: ' + question_text + '\nYour answers:'
    return gigachat.generate(prompt).split('\n\n')

In [23]:
options_llm("Who said 'In God we trust'?", 3, gigachat)

['George Washington', 'Thomas Jefferson', 'Benjamin Franklin']

In [31]:
def get_entities(question_text, gigachat):
    prompt = 'You need to find all denominations of people, locations, characters, buildings, movies etc. '\
                'from the question below. ' \
                'Do not give any additional information, facts and thoughts. Each one should be on a separate line.\n' \
                "Question: Who starred in 'Bride Wars' and married Anne Hathaway?\n" \
                "Answer: Bride Wars\nAnne Hathaway\n" \
                "Question: " + question_text + '\nAnswer:'
    return gigachat.generate(prompt).split('\n\n')

In [32]:
get_entities("Who wrote 'Don Quijote' for the Bolshoi Theatre?", gigachat)

['Don Quijote']

In [33]:
def relation_extraction(answer_candidate, entity, gigachat):
    prompt = f'Explain how {entity} and {answer_candidate} are related in one sentence.'
    return gigachat.generate(prompt)

In [34]:
relation_extraction('England', 'India', gigachat)

'India and England are related historically as the former was once a British colony and the two countries share a rich cultural and linguistic heritage.'

# Pipelines

In [38]:
database = VectDB()

In [44]:
def main_pipeline(question, gigachat, n_candidates, database, n_ontology_instances):
    candidates = options_llm(question, n_candidates, gigachat)
    ontological_info = {'inner': {}, 'outer': {}}
    entities = get_entities(question, gigachat)
    for candidate in candidates:
        ontological_info['inner'][candidate] = '\n'.join([relation_extraction(candidate, entity, gigachat) for entity in entities])
        ontological_info['outer'][candidate] = '\n'.join(database.query('Question: ' + question + '\nAnswer: ' + candidate, 
                                                              n_ontology_instances)['documents'][0])
    return ontological_info, candidates[0]

In [45]:
main_pipeline("Who said 'In God we trust'?", gigachat, 3, database, 10)

({'inner': {'George Washington': 'The United States and George Washington are related because George Washington was the first President of the United States, serving from 1789 to 1797.',
   'Thomas Jefferson': "Thomas Jefferson, the third President of the United States, played a significant role in shaping the nation's founding documents and values.",
   'Benjamin Franklin': 'Benjamin Franklin was a prominent American Founding Father and statesman who played a significant role in the founding of the United States.'},
  'outer': {'George Washington': 'President is a subclass of Politician\nDeity is a subclass of Agent\nAmbassador is a subclass of Politician\nReligiousBuilding is a subclass of Building\nChurch is a subclass of ReligiousBuilding\nChristianPatriarch is a subclass of Cleric\nChristianDoctrine is a subclass of TheologicalConcept\nTemple is a subclass of ReligiousBuilding\nChief is a subclass of Politician\nReligious is a subclass of Person',
   'Thomas Jefferson': 'Deity is 