In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
import os
from functools import reduce

class SemanticDocument():
    def __init__(self, QA_folder_path, passages_folder_path, model_name = 'airnicco8/xlm-roberta-en-it-de'):
        documents = {'Q':{}, 'p':{}}
        embedded_documents = {'Q':{}, 'p':{}}
        model = SentenceTransformer(model_name)
        for file_name in os.listdir(QA_folder_path):
            with open(os.path.join(QA_folder_path, file_name), encoding='utf-8') as f:
                sentences = f.read().split('\n')
                documents['Q'][file_name] = sentences
                embedded_documents['Q'][file_name] = model.encode(sentences)
        
        for file_name in os.listdir(passages_folder_path):
            with open(os.path.join(passages_folder_path, file_name), encoding='utf-8') as f:
                sentences = f.read().split('\n')
                documents['p'][file_name] = sentences
                embedded_documents['p'][file_name] = model.encode(sentences)

        self.model = model
        self.documents = documents
        self.embedded_documents = embedded_documents
        self.answers = None
        #lens = np.array(sorted(list(map(len,reduce(lambda y,z: y+z, map(lambda x: self.documents[x], self.documents.keys())))), key=lambda t: -t))
        #self.k = 2*(np.where(np.cumsum(lens)>=8000)[0][0]-1)

    def cosine_similarity(self, a, b):
        return a.dot(b)/(np.linalg.norm(a)*(np.linalg.norm(b)))

    def retrieve_passages(self, question, domain, k=10):
        Q = self.model.encode(question)
        results = []
        for base in domain:
            for key in self.documents[base]:
                for passage, K in zip(self.documents[base][key], self.embedded_documents[base][key]):
                    results.append({'base':base, 'file':key,'name': passage, 'score':self.cosine_similarity(K,Q)})
        return list(sorted(results, key=lambda x: -x['score']))[:k]

    def populate_answers(self, folder_path):
        answers = {}
        for file_name in os.listdir(folder_path):
            with open(os.path.join(folder_path, file_name), encoding='utf-8') as f:
                sentences = f.read().split('\n')
                for sentence in sentences:
                    Q, A = sentence.split(' A. ')
                    answers[Q] = A
        self.answers = answers

    def answer(self, question, db_path = 'data/QA/', domain=['Q','p']):
        if self.answers is None:
            self.populate_answers(db_path)
        results = self.retrieve_passages(question, domain)
        if results[0]['base'] == 'Q':
            question = results[0]['name']
            answer = self.answers[question]
        else:
            answer = self.generate_answer(question, results)
        return answer, question

    def generate_answer(self, question, results):
        


sd = SemanticDocument(QA_folder_path='data/Q/', passages_folder_path = 'data/passages/')#, model_name='dlicari/distil-ita-legal-bert')
   

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
question = "Quanti giorni di smartworking posso fare?"
answer, question = sd.answer(question, domain=['Q'])
print(question)
print(answer)

Q. Quanti giorni in modalità agile posso lavorare mensilmente?
Puoi lavorare in agile il numero di giornate che hai chiesto nell'adesione, il numero massimo è 12 giorni.


In [32]:
question = "Posso lavorare dalla Francia?"
answer, question = sd.answer(question)
print(question)
print(answer)

Q. Posso lavorare in modalità agile da un Paese estero?
sì è possibile, compatibilmente con la necessità di lavorare in presenza in alcune giornate.


In [33]:
question = "Posso ammazzare il mio capo?"
answer, question = sd.answer(question)
print(question)
print(answer)

Q. Il mio responsabile mi può chiedere di fare una cosa alle 18?
sì, può chiedertelo fino alle 19 ma tu non sei obbligato a farla se la tua giornata di lavoro è terminata.


In [34]:
question = "Posso drogarmi durante l'orario lavorativo?"
answer, question = sd.answer(question)
print(question)
print(answer)

Q. Posso lavorare in modalità agile prima o dopo una giornata di ferie?
Assolutamente sì.


# Deprecated

In [69]:
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple, Union
import openai
openai.api_key = "sk-W2v63UWK3U2J1CzRVFoAVhwIZkQd7mS4fqfrvd9E"

@dataclass
class RequestConfig:
    response_length: int = 500
    temperature: float = 0
    top_P: int = 1
    frequency_penalty: float = 0
    presence_penalty: float = 0.35
    best_of: int = 1
    stop_seqs = ['###']
    logprobs: bool = False
    model: str = "text-davinci-003"


question = "Posso lavorare dalla Siberia"
results = sd.retrieve_passages(question)
passages = '\n'.join(list(map(lambda x: x['name'], results)))
prompt = f"""{passages}, Rispondi a questa domanda: "{question}" """
response = openai.Completion.create(prompt=prompt, model=RequestConfig.model, max_tokens=RequestConfig.response_length, temperature=RequestConfig.temperature, top_p=RequestConfig.top_P, frequency_penalty=RequestConfig.frequency_penalty, presence_penalty=RequestConfig.presence_penalty, best_of=RequestConfig.best_of)
