<a href="https://colab.research.google.com/github/aasiyasan/Training/blob/main/Capstone_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain
!pip install langchain-openai
!pip install langchain_community
!pip install chromadb
!pip install langchain-google-genai
!pip install pypdf

In [None]:
!pip install twilio

In [3]:
# import libraries
import os
from langchain_community.document_loaders import PyPDFLoader
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

import sqlite3
from twilio.rest import Client
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from google.colab import userdata
import tweepy

In [4]:
gemini_api_key = userdata.get("GoogleAPIKey")

In [None]:
CHROMA_PATH = "Chroma"
# ----- Data Indexing Process -----
# load your pdf docs
DOC_PATH = "/content/Test.pdf"
# load your pdf doc
loader = PyPDFLoader(DOC_PATH)
pages = loader.load()
# split the doc into smaller chunks i.e. chunk_size=500
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

# get OpenAI Embedding model
#embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1", model_kwargs={'device': 'cpu'})
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embed the chunks as vectors and load them into the database.
db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH)
# ----- Retrieval and Generation Process -----

# this is an example of a user question (query)
#query = 'what are the top risks mentioned in the document?'
query='Summarize Nike Fiscal 2025 results'


In [7]:

# retrieve context - top 5 most relevant (closests) chunks to the query vector
# (by default Langchain is using cosine distance metric)
docs_chroma = db_chroma.similarity_search_with_score(query, k=5)

# generate an answer based on given user query and retrieved context information
context_text = "\n\n".join([doc.page_content for doc, _score in docs_chroma])
# you can use a prompt template
PROMPT_TEMPLATE = """Answer the question based only on the following context:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Don’t justify your answers.
Don’t give information not mentioned in the CONTEXT INFORMATION.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

In [9]:
# load retrieved context and user query in the prompt template
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query)

# call LLM model to generate the answer based on the given context and query
#using google genai instead of open AI here

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash" ,api_key=userdata.get('GoogleAPIKey'))

response_text = llm.predict(prompt)
print(response_text)

  response_text = llm.predict(prompt)


NIKE, Inc. reported its financial results for fiscal 2025, which ended on May 31, 2025.

**Overall Company Performance:**
*   Full year revenues for NIKE, Inc. were $46.3 billion, representing a 10 percent decrease on a reported basis and a 9 percent decrease on a currency-neutral basis.
*   Net income was $0.2 billion, down 86 percent.
*   Diluted earnings per share was $0.14, also an 86 percent decrease.
*   Total NIKE, Inc. Earnings Before Interest and Taxes (EBIT) for the twelve months ended May 31, 2025, were $3,778 million, a 42 percent decrease.
*   The EBIT margin for the twelve months ended May 31, 2025, was 8.2 percent.
*   Total NIKE, Inc. Income Before Income Taxes was $3,885 million, down 42 percent.

**Brand and Segment Performance:**
*   Revenues for the NIKE Brand were $44.7 billion, down 9 percent on both a reported and currency-neutral basis, driven by declines across all geographies.
*   TOTAL NIKE BRAND Earnings Before Interest and Taxes (EBIT) were $5,740 million f

# Stroing the Summary In SQLite

In [11]:
conn = sqlite3.connect('responses.db')
cursor = conn.cursor()
cursor.execute('''
    CREATE TABLE IF NOT EXISTS responses (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        query TEXT,
        response TEXT
    )
''')


cursor.execute('''
    INSERT INTO responses (query, response) VALUES (?, ?)
''', (query, response_text))


conn.commit()
conn.close()

# Twilo Credentials

In [12]:
# Twilio credentials
account_sid = userdata.get("TwilioAccountId")
auth_token = userdata.get("TwiliouthToken")

# Sending the message to a WhatsAPP number

In [None]:
twilio_whatsapp_number = userdata.get("Twilio_Whatsappsender")
recipient_number = userdata.get("WhatsappRecipient")

client = Client(account_sid, auth_token)
message = client.messages.create(
        body=response_text,
        from_=twilio_whatsapp_number,
        to=recipient_number
    )
print("Message sent! SID:", message.sid)


# Sending the Summary as text message to the Phone no.

In [None]:
twilio_phone_number = '+18777804236'
recipient_number = userdata.get("recipientphnumber")


client = Client(account_sid, auth_token)
message = client.messages.create(
        body=response_text,
        from_=twilio_phone_number,
        to=recipient_number
    )
print("SMS sent! SID:", message.sid)


# Send the summary as an Email

In [None]:

    # Email configuration
    sender_email = userdata.get("sender_email")
    sender_password = "uigkkansdpzlsqwq"
    recipient_email = userdata.get("recipient_email")
    subject = "Document Summary - Test"

    # Create the email message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = recipient_email
    msg['Subject'] = subject
    msg.attach(MIMEText(response_text, 'plain'))

    # Send the email
    try:
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
            server.login(sender_email, sender_password)
            server.sendmail(sender_email, recipient_email, msg.as_string())
        print("Email sent successfully!")
    except Exception as e:
        print("Failed to send email:", e)

# Posting the message in Twitter

In [None]:
# Twitter API credentials (replace with your own)
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''


#Shorten to 280 characters if needed
if len(response_text) > 280:
    response_text = response_text[:277] + '...'
print("Tweet message:", response_text)

# Post to Twitter
auth = tweepy.OAuth1UserHandler(
    consumer_key, consumer_secret, access_token, access_token_secret
)
api = tweepy.API(auth)
try:
    api.update_status(response_text)
    print("Tweet posted successfully!")
except Exception as e:
    print("Failed to post tweet:", e)
