In [12]:
import streamlit as st
import os, re, json
#os.environ["OPENAI_API_KEY"] = ''
import glob
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings, HuggingFaceBgeEmbeddings, OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain, LLMChain, StuffDocumentsChain
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
from streamlit_chat import message
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Run Chat

In [32]:
def create_chat(collection_name, use_json = False):
    persist_directory = './chromadb/'
    embed_path  = '/embeddings/'
    
#    llm = ChatOpenAI(model_name = 'gpt-4-1106-preview',  temperature=0)
    llm = ChatOpenAI(model_name = 'gpt-4-turbo-preview',  temperature=0)
    embeddings = HuggingFaceBgeEmbeddings(model_name = 'BAAI/bge-large-en', cache_folder=embed_path)


    client = chromadb.PersistentClient(path= persist_directory)
    db = Chroma(
        client=client,
    collection_name = collection_name,
        embedding_function=embeddings)
    retriever = db.as_retriever(search_type = 'similarity', search_kwargs={'k':20})


    combine_template = '''
     Use the following pieces of context to answer the question at the end. 
    If the context does not have the answer, just say that no information has been provided. 
    {selected_answer_option}
    If you don't know the answer, just say that you don't know, don't try to make up an answer and don't return any sources.
    Answer the question in {selected_language}
    
    \n\n{context}\n\nQuestion: {question}\nHelpful Answer:
    '''

    if use_json:
        combine_template = 'You are assistant. Only reply with JSON. ' + combine_template
        
    combine_docs_chain = StuffDocumentsChain(
     llm_chain=LLMChain( prompt=PromptTemplate(input_variables=['context', 'question', 'selected_language', 'selected_answer_option'], 
                                               template=combine_template), llm=llm), document_variable_name='context')


    template = '''
    Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
    in its original language.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:'
    '''

    question_generator  = LLMChain( prompt=PromptTemplate(input_variables=['chat_history', 'question'], 
    template=template), llm=llm)

    cqa5 = ConversationalRetrievalChain(retriever=retriever, return_source_documents=False,
                           combine_docs_chain=combine_docs_chain, question_generator=question_generator)
    
    #print (cqa5)
    return cqa5

In [33]:
def create_chat(collection_name):
    persist_directory = './chromadb/'
    embed_path  = '/embeddings/'
    
#    llm = ChatOpenAI(model_name = 'gpt-4-1106-preview',  temperature=0)
    llm = ChatOpenAI(model_name = 'gpt-4-turbo-preview',  temperature=0, model_kwargs={'stream':True })
    embeddings = HuggingFaceBgeEmbeddings(model_name = 'BAAI/bge-large-en', cache_folder=embed_path)


    client = chromadb.PersistentClient(path= persist_directory)
    db = Chroma(
        client=client,
    collection_name = collection_name,
        embedding_function=embeddings)
    retriever = db.as_retriever(search_type = 'similarity', search_kwargs={'k':20})


    combine_template = '''
     Use the following pieces of context to answer the question at the end. 
    If the context does not have the answer, just say that no information has been provided. 
    {selected_answer_option}
    If you don't know the answer, just say that you don't know, don't try to make up an answer and don't return any sources.
    Answer the question in {selected_language}
    
    \n\n{context}\n\nQuestion: {question}\nHelpful Answer:
    '''

        
    combine_docs_chain = StuffDocumentsChain(
     llm_chain=LLMChain( prompt=PromptTemplate(input_variables=['context', 'question', 'selected_language', 'selected_answer_option'], 
                                               template=combine_template), llm=llm), document_variable_name='context')


    template = '''
    Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
    in its original language.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:'
    '''

    question_generator  = LLMChain( prompt=PromptTemplate(input_variables=['chat_history', 'question'], 
    template=template), llm=llm)

    cqa5 = ConversationalRetrievalChain(retriever=retriever, return_source_documents=False,
                           combine_docs_chain=combine_docs_chain, question_generator=question_generator)
    
    return cqa5

In [38]:
persist_directory = './chromadb/'
embed_path  = '/embeddings/'
embeddings = HuggingFaceBgeEmbeddings(model_name = 'BAAI/bge-large-en', cache_folder=embed_path)
collection_name = 'dora_1000'

db_client = chromadb.PersistentClient(path= persist_directory)
db = Chroma(client=db_client, collection_name = collection_name,  embedding_function=embeddings)

In [41]:
import openai
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [57]:
selected_answer_option = 'short, succinct answer without bullet points. '
selected_answer_option = 'detailed answer formatted as a bulletted point list. '

system_message = f'''
     Use the following pieces of context to answer the question at the end. 
    If the context does not have the answer, just say that no information has been provided. 
    {selected_answer_option}
    If you don't know the answer, just say that you don't know, don't try to make up an answer and don't return any sources.'''

In [None]:
new_content = ''
model='gpt-4-turbo-preview'
temperature = 0
chat_history = []
while new_content != 'end':

    new_content = input ()
    messages = list(chat_history)
    messages.append({"role": "system", "content": system_message})

    documents = db.similarity_search(new_content, k = 30)
    for doc in documents:
        messages.append({"role": "system", "content": f"The following is a document for reference: {doc.page_content}"})
    
    messages.append({"role": "user", "content": new_content})
    chat_history.append({"role": "user", "content": new_content})
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens = 300
    )

    answer = response.choices[0].message.content
    chat_history.append({"role": "assistant", "content": answer})
    print(response['choices'][0]['message']['content'])


In [59]:
new_content = ''
model='gpt-4-turbo-preview'
temperature = 0
chat_history = []
while new_content != 'end':

    new_content = input ()
    messages = list(chat_history)
    messages.append({"role": "system", "content": system_message})

    documents = db.similarity_search(new_content, k = 30)
    for doc in documents:
        messages.append({"role": "system", "content": f"The following is a document for reference: {doc.page_content}"})
    
    messages.append({"role": "user", "content": new_content})
    chat_history.append({"role": "user", "content": new_content})
    stream = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens = 300,
        stream=True,
    )

    for chunk in stream:
        print(chunk.choices[0].delta.content or "", end="")

What are the risks defined in the document?
The document defines the following risks:

- **ICT risk**: Any reasonably identifiable circumstance in relation to the use of network and information systems which, if materialized, may compromise the security of the network and information systems, of any technology-dependent tool or process, of operations and processes, or of the provision of services by producing adverse effects in the digital or physical environment.

- **Operational incidents, other than in relation to ICT risk**: Resulting from risks not specifically related to ICT but could impact the operational capability of the financial entity.

- **ICT third-party risk**: Risks arising from contractual arrangements on the use of ICT services concluded with ICT third-party service providers, taking into account the criticality or importance of the respective service, process or function, and the potential impact on the continuity and availability of financial services and activitie

In [27]:
def read_json_response(res):
    match = re.search(r'```json\n({.*?})\n```',res, re.DOTALL)
    if match:
        json_string = match.group(1)
        res = json.loads(json_string)
        #print(response.values())
    else:
        res = ''
    return list(res.values())[0]

In [34]:
collection_name = 'dora_1000'
cqa = create_chat(collection_name)

In [10]:
chat_history = []
selected_language  = "English"

In [37]:
cqa.run({'question':question, 'chat_history':chat_history}, callbacks=[callback])

AttributeError: 'Stream' object has no attribute 'dict'

In [36]:
callback = StreamingStdOutCallbackHandler()

question = 'What are the risks defined in the document?'
response = cqa.run({'question':question, 'chat_history':chat_history, 'selected_language':selected_language, 
               'selected_answer_option':'short, succinct answer without bullet points. '})
# for chunk in response['answer']:
#     line = chunk.choices[0].delta.content or ""
#     print(line, end="")
# answer = response['answer']
# print (answer)
# chat_history.append((question, answer))

AttributeError: 'Stream' object has no attribute 'dict'

In [16]:
for intermediate_result in cqa({'question':question, 'chat_history':chat_history, 'selected_language':selected_language, 
               'selected_answer_option':'short, succinct answer without bullet points. '}).run_with_generator("Tell me about astronomy"):
    print(intermediate_result.text)  # Process each text chunk

AttributeError: 'dict' object has no attribute 'run_with_generator'

In [None]:
response

In [None]:
read_json_response(response_json['answer'])

In [None]:
chat_history=[]
while True:
    question = input()
    if question == 'done':
        break
    result = cqa({'question':question, 'chat_history':chat_history})
    answer = read_json_response(result['answer'])
    chat_history.append((question, result['answer']))
    
    print (answer)
    print()

In [5]:
history = {'whoare you?':{'long':'I am David', 'short':'Dave'}}

In [6]:
history

{'whoare you?': {'long': 'I am David', 'short': 'Dave'}}

In [7]:
history['whoare you?']

{'long': 'I am David', 'short': 'Dave'}

In [20]:
import json
with open(r'output/eu_ai_1000.json', 'r') as f:
    data = json.load(f)

In [21]:
for k,v in data.items():
    print (v['short'])

The text discusses the importance of risk management for high-risk AI systems, emphasizing the need for continuous planning, implementation, and review throughout the system's lifecycle. It highlights measures such as risk reduction, mitigation, and transparency, as well as the importance of technical documentation, record-keeping, and a quality management system to ensure compliance with regulations.
The text discusses the auditing process for AI systems, highlighting key points such as access for auditing, periodic audits, surveillance of quality management systems, and notification of changes. Notified bodies play a crucial role in ensuring compliance with standards and regulations for AI systems.
The text stresses the importance of data quality in training high-risk AI systems, emphasizing the need for relevant, error-free, and representative datasets. It highlights the significance of appropriate statistical properties, data governance practices, compliance with EU data protection

In [22]:
data

{'What does the document say about risk management?': {'short': "The text discusses the importance of risk management for high-risk AI systems, emphasizing the need for continuous planning, implementation, and review throughout the system's lifecycle. It highlights measures such as risk reduction, mitigation, and transparency, as well as the importance of technical documentation, record-keeping, and a quality management system to ensure compliance with regulations.",
  'long': "The document discusses various aspects of risk management for high-risk AI systems. It outlines the establishment, implementation, documentation, and maintenance of a risk management system specifically for high-risk AI systems. This system is described as a continuous, iterative process that should be planned and run throughout the entire lifecycle of a high-risk AI system, necessitating regular, systematic review and updates. The risk management measures should ensure that the relevant residual risk associated