In [1]:
import os
from langchain.vectorstores.chroma import Chroma
from langchain_text_splitters.character import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from dotenv import load_dotenv, find_dotenv

In [2]:
load_dotenv(find_dotenv())

True

In [3]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY", "")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "")

### Loading the Documents

In [4]:
gdpr_data_location = "gdpr_docs/Complete-Guide-to-General-Data-Protection-Regulation-GDPR-1.pdf"

gdpr_loader = PyPDFLoader(gdpr_data_location)

gdpr_loaded_doc = gdpr_loader.load()

### Splitting the Documents

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
gdpr_splitted_docs = text_splitter.split_documents(gdpr_loaded_doc)

gdpr_splitted_docs

[Document(page_content='Complete Guide to \nGeneral Data Protection \nRegulation  (GDPR)', metadata={'source': 'gdpr_docs/Complete-Guide-to-General-Data-Protection-Regulation-GDPR-1.pdf', 'page': 0}),
 Document(page_content='THE DIRECTIVE AND\nCOMPLIANCE OF GDPR\nGDPR DEFINITIONCOMPLETE GUIDE TO  GENERAL DATA PROTECTION REGULATION (GDPR) 02\nOver the past few year’s technologies have transformed us beyond our imagination; Data has \nplayed a significant role in this transformation. Data is considered the new gold, and most businesses depend on these data for various business functions. There are many instances where this data has been misused and has affected individuals and business operations.\nEuropean government in 2016 adopted General Data Protection Regulation (GDPR) and was \nput into effect on May 25, 2018, replacing the 1995’s Data Protection Directive to protect the \npersonal information of EU citizens. GDPR aims to govern personal data processing and ensure \nprocessing is 

### Loaded into ChromaDB

In [16]:
chroma_db = Chroma.from_documents(
    documents=gdpr_splitted_docs, 
    embedding=OpenAIEmbeddings()
)

retriever = chroma_db.as_retriever()

In [19]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an expert AI Assistant"),
        (
            "ai",
            """
            You will be given some input. Your job is to analyse the input and provide step-step process based on:
            1) Data Retention
            2) Privacy
            3) Data Security
            4) Processing
            Your answer should only based on the above 4 poins and nothing else. You should not use your own knowledge
            and assume anything.
            """
        ),
        ("human", "{input}")
    ]
)

llm = ChatOpenAI(temperature=0)

chain = prompt | llm | StrOutputParser()

result = chain.invoke({"input": "A web application collecting personal information and sending to third party"})

print(result)

1) Data Retention: The web application should only retain personal information for as long as necessary to fulfill the purpose for which it was collected. Once the information is no longer needed, it should be securely deleted to minimize the risk of unauthorized access or misuse.

2) Privacy: The web application must inform users about the collection of their personal information and obtain their consent before sending it to a third party. Users should have the option to opt out of sharing their data with third parties if they choose to do so.

3) Data Security: The web application must implement appropriate security measures to protect the personal information collected from unauthorized access, disclosure, or alteration. This includes using encryption, access controls, and regular security audits to ensure the data is secure.

4) Processing: The web application should only process personal information for the specific purposes disclosed to users and agreed upon by them. Any processi

In [21]:
similarity_retrieval_chain = chain | retriever

ans = similarity_retrieval_chain.invoke(
    {"input": "A web application collecting personal information and sending to third party"}
)

print(ans[0].page_content)

GDPR PRINCIPLES03
According to Article 5.1-2 of GDPR Act you must follow seven protection and accountability princi-
ples if you process data
Fair, lawful, and transparent processing –  You need to communicate what the individual data 
is used and processed.
Purpose limitation principle - Personal data may only be collected for specified, explicit and 
legitimate purposes. States are responsible for implementing safeguards if data is processed than 
for what was collected.
Data minimization - Personal data must be adequate, relevant and not excessive with the 
purposes for which those data are collected and/or further processed.
Accuracy - personal data collected must be accurate and up to date. Inaccurate data must be 
identified at the early stage to ensure it is erased or rectified without delay.
Data retention periods - Personal data must be store for as long as necessary for the specified
