## Revolutionizing Workflows with AI The Future of Efficiency
### CyberSheild 2023 - ShieldCon
#### Demo

In [None]:
import os

from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pdf2image
import openai
import pinecone
import tqdm

from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

import json

In [None]:
OPENAI_API_KEY=""
PINECONE_API_KEY=""
PINECONE_API_ENV=""
INDEX_NAME=""
TEST="TEST123"

# Load your Data

In [None]:
print(TEST)

In [None]:
loader = PyPDFLoader("./cybersecurity-report-202109.pdf")
data = loader.load()
data

## Chunk your data up into smaller documents

In [None]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')

## Create embeddings of your documents to get ready for semantic search

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = INDEX_NAME # put in the name of your pinecone index here

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [None]:
query = "What specific initiatives has the CISO introduced in order to improve the Board's cybersecurity and privacy processes?"
docs = docsearch.similarity_search(query)
docs

## Aquery the doc to get answers

In [None]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "Please summarize this document for me"
docs = docsearch.similarity_search(query)

In [None]:
chain.run(input_documents=docs, question=query)