In [10]:
# Run this in your first Jupyter notebook cell
# Each package serves a specific purpose:

# Core LangChain framework
!pip install langchain 

# OpenAI integration (most popular LLM provider)  
!pip install openai

# For loading different document types
!pip install pypdf2 docx2txt

# For web searching capabilities
!pip install google-search-results

# For working with vector databases
!pip install chromadb

# For environment variable management
!pip install python-dotenv

# For making HTTP requests (useful for APIs)
!pip install requests

# For data manipulation (you'll often need this)
!pip install pandas


!pip install langchain
!pip install langchain-openai  # This is the new OpenAI integration package
!pip install langchain-community  # For community integrations
!pip install python-dotenv

print(" All packages installed successfully!")

Collecting langchain-openai
  Using cached langchain_openai-0.3.31-py3-none-any.whl.metadata (2.4 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl.metadata (6.7 kB)
Collecting regex>=2022.1.18 (from tiktoken<1,>=0.7->langchain-openai)
  Downloading regex-2025.7.34-cp313-cp313-macosx_11_0_arm64.whl.metadata (40 kB)
Downloading langchain_openai-0.3.31-py3-none-any.whl (74 kB)
Downloading tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl (997 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m997.1/997.1 kB[0m [31m21.4 MB/s[0m  [33m0:00:00[0m
[?25hDownloading regex-2025.7.34-cp313-cp313-macosx_11_0_arm64.whl (285 kB)
Installing collected packages: regex, tiktoken, langchain-openai
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [langchain-openai]
[1A[2KSuccessfully installed langchain-openai-0.3.31 regex-2025.7.34 tiktoken-0.11.0
Collecting langchain-community
  Using cached

In [11]:
# Create a .env file in your project folder with this content:
# OPENAI_API_KEY=your_api_key_here
# SERPAPI_API_KEY=your_serpapi_key_here

# Then use this code to load them securely:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get API keys from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
serpapi_api_key = os.getenv("SERPAPI_API_KEY")

# Verify keys are loaded (without revealing them)
print(f"OpenAI key loaded: {'✅' if openai_api_key else '❌'}")
print(f"SerpAPI key loaded: {'✅' if serpapi_api_key else '❌'}")

OpenAI key loaded: ✅
SerpAPI key loaded: ✅


In [12]:
# Add these to the top of your notebooks for better experience
import warnings
warnings.filterwarnings('ignore')  # Reduces noise from deprecation warnings

# For better output formatting
from IPython.display import display, HTML, Markdown

# Auto-reload modules (helpful during development)
%load_ext autoreload
%autoreload 2

print("🚀 Jupyter environment optimized!")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
🚀 Jupyter environment optimized!


In [13]:
# Example 1: Hello World - Basic LLM Call
import os
from langchain.llms import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Initialize the OpenAI LLM
# temperature=0.7 means moderately creative responses
# temperature=0 would be very deterministic, temperature=1 very creative
llm = OpenAI(
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    temperature=0.7,
    max_tokens=150  # Limit response length
)

# Your first prompt
prompt = "Explain what artificial intelligence is in simple terms that a 10-year-old would understand."

# Make the call to the LLM
try:
    response = llm(prompt)
    print("🤖 AI Response:")
    print(response)
except Exception as e:
    print(f"❌ Error: {e}")
    print("💡 Check your API key and internet connection")

# Let's try a few more examples to see how it works
example_prompts = [
    "Write a haiku about programming",
    "What are three benefits of renewable energy?",
    "Explain the concept of gravity using an analogy"
]

print("\n" + "="*50)
print("🎯 Testing different types of prompts:")
print("="*50)

for i, prompt in enumerate(example_prompts, 1):
    print(f"\n📝 Prompt {i}: {prompt}")
    try:
        response = llm(prompt)
        print(f"🤖 Response: {response.strip()}")
    except Exception as e:
        print(f"❌ Error with prompt {i}: {e}")

print("\n✅ Basic LLM interaction complete!")

🤖 AI Response:


Artificial intelligence (AI) is when computers are taught to think and make decisions like humans. This means they can learn and solve problems on their own without being told exactly what to do. Just like how you learn new things every day, computers can also learn new things and become smarter over time.

🎯 Testing different types of prompts:

📝 Prompt 1: Write a haiku about programming
🤖 Response: Code flows like a stream
Syntax creates harmony
Logic brings order

📝 Prompt 2: What are three benefits of renewable energy?
🤖 Response: 1. Environmental Sustainability: The use of renewable energy sources such as solar, wind, and hydro power does not produce greenhouse gas emissions or contribute to air or water pollution. This helps reduce the negative impact of energy production on the environment and helps combat climate change.

2. Energy Security and Independence: Unlike fossil fuels, which are finite resources that must be imported from other countries, renewable en

Prompt Template and LLMChain

In [14]:
# Example 2: Prompt Templates - Reusable and Dynamic Prompts
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os

# Initialize LLM
llm = OpenAI(
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    temperature=0.3  # Lower temperature for more consistent responses
)

# Create a prompt template for customer service responses
customer_service_template = """
You are a helpful customer service representative for TechCorp, a software company.
Your goal is to be professional, empathetic, and solution-oriented.

Customer Information:
- Name: {customer_name}
- Issue Type: {issue_type}
- Product: {product_name}
- Customer Tier: {customer_tier}

Customer Message: {customer_message}

Please provide a professional response that:
1. Addresses the customer by name
2. Shows empathy for their issue
3. Provides a clear solution or next steps
4. Maintains a friendly but professional tone

Response:
"""

# Create the prompt template object
prompt = PromptTemplate(
    input_variables=["customer_name", "issue_type", "product_name", "customer_tier", "customer_message"],
    template=customer_service_template
)

# Create a chain (we'll learn more about chains in the next example)
chain = LLMChain(llm=llm, prompt=prompt)

# Test with different customer scenarios
test_scenarios = [
    {
        "customer_name": "Sarah Johnson",
        "issue_type": "Technical Problem",
        "product_name": "CloudSync Pro",
        "customer_tier": "Premium",
        "customer_message": "Hi, I've been trying to sync my files for the past hour but it keeps failing. This is really frustrating as I have an important presentation tomorrow."
    },
    {
        "customer_name": "Mike Chen",
        "issue_type": "Billing Question", 
        "product_name": "DataAnalyzer",
        "customer_tier": "Standard",
        "customer_message": "I was charged twice for my subscription this month. Can you please help me understand why and get this resolved?"
    },
    {
        "customer_name": "Emma Davis",
        "issue_type": "Feature Request",
        "product_name": "ProjectManager",
        "customer_tier": "Enterprise", 
        "customer_message": "Our team would really benefit from having a dark mode option. Is this something you're planning to add?"
    }
]

print("🎯 Testing Customer Service Template:")
print("="*60)

for i, scenario in enumerate(test_scenarios, 1):
    print(f"\n📧 Scenario {i}:")
    print(f"Customer: {scenario['customer_name']}")
    print(f"Issue: {scenario['issue_type']}")
    print(f"Message: {scenario['customer_message']}")
    
    try:
        response = chain.run(scenario)
        print(f"\n🤖 AI Response:")
        print(response.strip())
        print("\n" + "-"*50)
    except Exception as e:
        print(f"❌ Error: {e}")

# Let's create another template for different use case - content creation
content_creation_template = """
You are a content marketing expert. Create engaging content based on the following parameters:

Content Type: {content_type}
Target Audience: {target_audience}
Topic: {topic}
Tone: {tone}
Word Count: Approximately {word_count} words
Key Points to Include: {key_points}

Please create {content_type} that:
- Resonates with {target_audience}
- Maintains a {tone} tone throughout
- Incorporates all the key points naturally
- Is engaging and actionable

Content:
"""

content_prompt = PromptTemplate(
    input_variables=["content_type", "target_audience", "topic", "tone", "word_count", "key_points"],
    template=content_creation_template
)

content_chain = LLMChain(llm=llm, prompt=content_prompt)

# Test content creation template
content_example = {
    "content_type": "blog post introduction",
    "target_audience": "small business owners",
    "topic": "benefits of automating customer support",
    "tone": "professional but approachable",
    "word_count": "200",
    "key_points": "cost savings, 24/7 availability, consistency, scalability"
}

print("\n\n🎯 Testing Content Creation Template:")
print("="*60)

try:
    content_response = content_chain.run(content_example)
    print(f"📝 Generated Content:")
    print(content_response.strip())
except Exception as e:
    print(f"❌ Error: {e}")

print("\n✅ Prompt Templates demonstration complete!")

🎯 Testing Customer Service Template:

📧 Scenario 1:
Customer: Sarah Johnson
Issue: Technical Problem
Message: Hi, I've been trying to sync my files for the past hour but it keeps failing. This is really frustrating as I have an important presentation tomorrow.

🤖 AI Response:
Hi Sarah,

I'm sorry to hear that you've been experiencing difficulties with syncing your files on CloudSync Pro. I can understand how frustrating this must be, especially with an important presentation coming up.

Rest assured, our team is here to help you resolve this issue as quickly as possible. To start, can you please try restarting your device and then attempting to sync again? If the issue persists, please try clearing your cache and cookies and then retrying the sync.

If these steps do not resolve the issue, our technical support team is available 24/7 to assist you further. As a premium customer, you have access to our priority support line at 1-800-TECHCORP. Our team will be more than happy to troubles

Chains

In [16]:
# Example 3: Chains - Building Multi-Step AI Workflows
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.utilities import SerpAPIWrapper
import os

# Initialize components
llm = OpenAI(
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    temperature=0.3
)

# Let's build a research and summarization chain
# Step 1: Research a topic and find key information
research_template = """
You are a research assistant. I need you to research the topic: {topic}

Based on your knowledge, provide:
1. Key concepts and definitions
2. Current trends and developments  
3. Main challenges or debates
4. Important statistics or facts
5. Notable experts or organizations

Focus on providing accurate, up-to-date information that would be useful for someone wanting to understand this topic thoroughly.

Research findings:
"""

research_prompt = PromptTemplate(
    input_variables=["topic"],
    template=research_template
)

research_chain = LLMChain(llm=llm, prompt=research_prompt)

# Step 2: Summarize the research into an executive summary
summary_template = """
You are an executive summary writer. Take the following research findings and create a concise, professional executive summary.

Research Findings:
{research_findings}

Please create an executive summary that:
- Captures the most important points in 3-4 paragraphs
- Uses clear, professional language
- Highlights key takeaways and implications
- Would be suitable for business leaders or decision makers

Executive Summary:
"""

summary_prompt = PromptTemplate(
    input_variables=["research_findings"],
    template=summary_template
)

summary_chain = LLMChain(llm=llm, prompt=summary_prompt)

# Step 3: Create the overall chain by connecting research and summary
overall_chain = SimpleSequentialChain(
    chains=[research_chain, summary_chain],
    verbose=True  # This will show us what happens at each step
)

# Test the chain with different topics
test_topics = [
    "artificial intelligence in healthcare",
    "renewable energy adoption challenges", 
    "remote work productivity trends"
]

print("🔗 Testing Sequential Chain - Research & Summarization:")
print("="*70)

for topic in test_topics:
    print(f"\n📚 Researching: {topic}")
    print("-" * 50)
    
    try:
        # Run the complete chain
        result = overall_chain.run(topic)
        print(f"📋 Final Executive Summary:")
        print(result)
        print("\n" + "="*50)
    except Exception as e:
        print(f"❌ Error processing {topic}: {e}")

# Let's build a more complex chain with multiple inputs
# This will be a content creation pipeline
from langchain.chains import SequentialChain

# Step 1: Generate content outline
outline_template = """
Create a detailed outline for a {content_type} about {topic} targeted at {audience}.

The outline should:
- Have a compelling introduction
- 3-5 main sections with subsections
- Include key points for each section
- Suggest where to include examples or case studies
- End with a strong conclusion

Outline:
"""

outline_prompt = PromptTemplate(
    input_variables=["content_type", "topic", "audience"],
    template=outline_template
)

outline_chain = LLMChain(
    llm=llm, 
    prompt=outline_prompt, 
    output_key="outline"
)

# Step 2: Write the introduction based on the outline
intro_template = """
Based on this outline:
{outline}

Write an engaging introduction for the {content_type} that:
- Hooks the reader immediately
- Clearly states what they'll learn
- Establishes credibility
- Builds excitement for the content

Introduction:
"""

intro_prompt = PromptTemplate(
    input_variables=["outline", "content_type"],
    template=intro_template
)

intro_chain = LLMChain(
    llm=llm,
    prompt=intro_prompt,
    output_key="introduction"
)

# Step 3: Create a compelling title based on outline and introduction
title_template = """
Based on this outline and introduction:

OUTLINE:
{outline}

INTRODUCTION: 
{introduction}

Create 5 compelling title options that:
- Capture attention immediately
- Clearly convey the value proposition
- Are optimized for the target audience
- Include power words that drive engagement

Title Options:
"""

title_prompt = PromptTemplate(
    input_variables=["outline", "introduction"],
    template=title_template
)

title_chain = LLMChain(
    llm=llm,
    prompt=title_prompt,
    output_key="title_options"
)

# Create the sequential chain with multiple inputs and outputs
content_creation_chain = SequentialChain(
    chains=[outline_chain, intro_chain, title_chain],
    input_variables=["content_type", "topic", "audience"],
    output_variables=["outline", "introduction", "title_options"],
    verbose=True
)

# Test the multi-step content creation chain
print("\n🔗 Testing Multi-Step Content Creation Chain:")
print("="*70)

content_example = {
    "content_type": "blog post",
    "topic": "benefits of AI automation for small businesses",
    "audience": "small business owners who are not tech-savvy"
}

try:
    results = content_creation_chain(content_example)
    
    print(f"📋 OUTLINE:")
    print(results["outline"])
    print(f"\n✍️ INTRODUCTION:")
    print(results["introduction"])
    print(f"\n🏷️ TITLE OPTIONS:")
    print(results["title_options"])
    
except Exception as e:
    print(f"❌ Error: {e}")

print("\n✅ Chains demonstration complete!")

🔗 Testing Sequential Chain - Research & Summarization:

📚 Researching: artificial intelligence in healthcare
--------------------------------------------------


[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
1. Key concepts and definitions:
- Artificial intelligence (AI): The simulation of human intelligence processes by computer systems, including learning, reasoning, and self-correction.
- Healthcare: The maintenance or improvement of health through the prevention, diagnosis, and treatment of disease, illness, injury, and other physical and mental impairments.
- AI in healthcare: The use of AI technology and algorithms to analyze complex medical data, assist in clinical decision-making, and improve patient outcomes.

2. Current trends and developments:
- AI-powered medical imaging: AI algorithms are being used to analyze medical images, such as X-rays, MRIs, and CT scans, to assist in the diagnosis of diseases and conditions.
- Predictive analytics: AI is being 

Memory

In [17]:
# Example 4: Memory - Conversational AI That Remembers
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
from langchain.chains import ConversationChain
from langchain.schema import HumanMessage, AIMessage
import os

# Initialize the LLM
llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="gpt-3.5-turbo",
    temperature=0.7
)

# Create a simple memory buffer that remembers everything
buffer_memory = ConversationBufferMemory(return_messages=True)

# Create a conversation chain with memory
conversation = ConversationChain(
    llm=llm,
    memory=buffer_memory,
    verbose=True  # Shows us what's happening behind the scenes
)

print("🧠 Testing Basic Conversation Memory:")
print("="*50)

# Simulate a conversation with multiple turns
conversation_turns = [
    "Hi, I'm Sarah and I work as a marketing manager at a tech startup.",
    "I'm planning a product launch campaign for our new AI-powered analytics tool.",
    "The target audience is small to medium-sized businesses who struggle with data analysis.",
    "What are some creative marketing strategies you'd recommend?",
    "That's great! Can you elaborate on the content marketing approach, keeping in mind what I told you about our target audience?",
    "Perfect! Now, what about my role as Sarah - do you remember what I do?"
]

for i, user_input in enumerate(conversation_turns, 1):
    print(f"\n👤 Turn {i}: {user_input}")
    try:
        response = conversation.predict(input=user_input)
        print(f"🤖 Assistant: {response}")
        print("-" * 40)
    except Exception as e:
        print(f"❌ Error: {e}")

# Let's examine what the memory contains
print("\n🔍 Memory Contents:")
print("="*30)
print(buffer_memory.buffer_as_str)

# Now let's try a more sophisticated memory - Summary Memory
# This summarizes old conversations to save space while retaining key information
print("\n\n🧠 Testing Summary Memory (for longer conversations):")
print("="*60)

summary_memory = ConversationSummaryMemory(
    llm=llm,
    return_messages=True,
    max_token_limit=200  # When conversation exceeds this, it will summarize
)

summary_conversation = ConversationChain(
    llm=llm,
    memory=summary_memory,
    verbose=True
)

# Simulate a longer conversation that will trigger summarization
long_conversation = [
    "I'm John, a restaurant owner in downtown Seattle.",
    "I've been running my Italian restaurant 'Bella Vista' for 15 years.",
    "Recently, I've been struggling with online reviews and social media presence.",
    "My daughter keeps telling me I need to modernize my marketing approach.",
    "I have about 50 tables, serve traditional Italian cuisine, and my customers are mostly families and couples.",
    "The restaurant is located near the waterfront and we have a beautiful view.",
    "I'm particularly known for my homemade pasta and wood-fired pizza.",
    "What specific social media strategies would work best for my type of restaurant?",
    "That sounds good, but remember I'm not very tech-savvy. Can you suggest simple tools?",
    "Based on everything I've told you about my restaurant, what should be my top priority?"
]

for i, user_input in enumerate(long_conversation, 1):
    print(f"\n👤 Turn {i}: {user_input}")
    try:
        response = summary_conversation.predict(input=user_input)
        print(f"🤖 Assistant: {response}")
        if i % 3 == 0:  # Show memory state every 3 turns
            print(f"\n📝 Memory Summary: {summary_memory.moving_summary_buffer}")
        print("-" * 40)
    except Exception as e:
        print(f"❌ Error: {e}")

# Custom Memory with Specific Context Tracking
print("\n\n🎯 Building a Personal Assistant with Custom Memory:")
print("="*60)

# Create a custom prompt that emphasizes context awareness
custom_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful personal assistant with excellent memory. 
    
    Your key traits:
    - Remember important details about the user (name, preferences, projects, etc.)
    - Refer back to previous conversations naturally
    - Be proactive in connecting current requests to past context
    - Ask clarifying questions when needed
    - Maintain a friendly, professional tone
    
    Always consider the full conversation history when responding."""),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}")
])

# Create memory specifically for this assistant
assistant_memory = ConversationBufferMemory(
    memory_key="history",
    return_messages=True
)

# Create the custom conversation chain
from langchain.chains import LLMChain

assistant_chain = LLMChain(
    llm=llm,
    prompt=custom_prompt,
    memory=assistant_memory,
    verbose=True
)

# Test the personal assistant
assistant_conversation = [
    "Hi, I'm Emily and I'm a freelance graphic designer.",
    "I'm working on rebranding project for a coffee shop called 'Morning Brew'.",
    "The owner wants a modern, minimalist design that appeals to young professionals.",
    "I'm struggling with color palette choices. Any suggestions?",
    "Great ideas! By the way, I have a client meeting tomorrow about this Morning Brew project. Can you help me prepare talking points?",
    "Perfect! One more thing - can you remind me what I told you about the target audience for Morning Brew?"
]

print("💬 Personal Assistant Conversation:")
for i, user_input in enumerate(assistant_conversation, 1):
    print(f"\n👤 Emily (Turn {i}): {user_input}")
    try:
        response = assistant_chain.run(input=user_input)
        print(f"🤖 Assistant: {response}")
        print("-" * 50)
    except Exception as e:
        print(f"❌ Error: {e}")

print("\n✅ Memory systems demonstration complete!")

🧠 Testing Basic Conversation Memory:

👤 Turn 1: Hi, I'm Sarah and I work as a marketing manager at a tech startup.


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[]
Human: Hi, I'm Sarah and I work as a marketing manager at a tech startup.
AI:[0m

[1m> Finished chain.[0m
🤖 Assistant: Hello Sarah! It's nice to meet you. As a marketing manager at a tech startup, I imagine you must be constantly coming up with new strategies to reach your target audience. What specific marketing tactics have you found to be most successful in the tech industry?
----------------------------------------

👤 Turn 2: I'm planning a product launch campaign for our new AI-powered analytics tool.


[1m> Entering new Co

In [18]:
# Example 5: RAG - Retrieval-Augmented Generation
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

# Initialize components
llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="gpt-3.5-turbo",
    temperature=0.1  # Lower temperature for more factual responses
)

embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))

print("📚 RAG System: Building AI That Knows Your Documents")
print("="*60)

# Step 1: Create sample documents (in a real scenario, you'd load from files)
sample_documents = [
    """
    Company Policy: Remote Work Guidelines
    
    Effective Date: January 1, 2024
    
    Our company supports flexible work arrangements to promote work-life balance and productivity.
    
    Remote Work Eligibility:
    - Employees must have been with the company for at least 6 months
    - Role must be suitable for remote work (approved by department head)
    - Employee must have a dedicated workspace and reliable internet
    
    Remote Work Schedule:
    - Full-time remote: Up to 5 days per week from home
    - Hybrid: 2-3 days in office, remainder remote
    - Flexible hours between 7 AM - 6 PM local time
    
    Equipment and Technology:
    - Company provides laptop and necessary software licenses
    - Employees responsible for home internet and workspace setup
    - IT support available for company equipment
    
    Performance and Communication:
    - Weekly one-on-one meetings with direct supervisor
    - Participate in all team meetings via video conference
    - Maintain regular communication through Slack during business hours
    - Quarterly performance reviews remain unchanged
    
    Security Requirements:
    - Use VPN for all company network access
    - Enable two-factor authentication on all accounts
    - Do not work from public spaces when handling sensitive information
    """,
    
    """
    Employee Benefits Guide
    
    Health Insurance:
    - PPO and HMO options available
    - Company pays 80% of premiums for employee
    - Family coverage available (employee pays 40% of additional cost)
    - $500 annual health and wellness reimbursement
    
    Retirement Plan:
    - 401(k) with 4% company matching (100% vested after 2 years)
    - Financial planning consultations available quarterly
    - Automatic enrollment at 3% contribution rate
    
    Paid Time Off:
    - 15 days PTO for years 1-3
    - 20 days PTO for years 4-7  
    - 25 days PTO for years 8+
    - 10 paid holidays annually
    - Unlimited sick leave (manager approval for 5+ consecutive days)
    
    Professional Development:
    - $2,000 annual learning budget per employee
    - Conference attendance supported (pre-approval required)
    - LinkedIn Learning and Coursera subscriptions provided
    - Mentorship program available
    
    Other Benefits:
    - Flexible spending accounts (FSA) for healthcare and dependent care
    - Employee assistance program (EAP) for counseling and support
    - Company-sponsored team events and happy hours
    - Discounted gym memberships at partner locations
    """,
    
    """
    IT Security Policy
    
    Password Requirements:
    - Minimum 12 characters with mix of letters, numbers, and symbols
    - Changed every 90 days
    - Cannot reuse last 12 passwords
    - Use company-approved password manager
    
    Data Classification:
    - Public: Can be shared freely (marketing materials, job postings)
    - Internal: Company information not meant for external sharing
    - Confidential: Customer data, financial information, strategic plans
    - Restricted: Legal documents, HR records, security credentials
    
    Email Security:
    - Be suspicious of unexpected attachments or links
    - Verify sender identity for sensitive requests
    - Use encryption for confidential information
    - Report phishing attempts to IT immediately
    
    Device Management:
    - All company devices must be encrypted
    - Install security updates within 48 hours of release
    - Report lost or stolen devices immediately
    - Personal use of company devices is acceptable for reasonable personal activities
    
    Incident Response:
    - Report security incidents to IT within 1 hour of discovery
    - Do not attempt to fix security issues independently
    - Preserve evidence by avoiding further system use
    - Cooperate fully with security investigations
    """
]

# Step 2: Process documents into chunks
print("🔄 Step 1: Processing documents...")

# Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,    # Size of each chunk
    chunk_overlap=200,  # Overlap between chunks to maintain context
    length_function=len
)

# Split documents into chunks
all_chunks = []
for i, doc_text in enumerate(sample_documents):
    chunks = text_splitter.split_text(doc_text)
    for chunk in chunks:
        all_chunks.append(chunk)

print(f"✅ Created {len(all_chunks)} document chunks")

# Step 3: Create embeddings and vector store
print("🔄 Step 2: Creating vector embeddings...")

try:
    # Create vector store
    vectorstore = Chroma.from_texts(
        texts=all_chunks,
        embedding=embeddings,
        persist_directory="./chroma_db"  # Saves the database locally
    )
    print("✅ Vector store created successfully")
except Exception as e:
    print(f"❌ Error creating vector store: {e}")
    print("💡 Make sure you have chromadb installed: !pip install chromadb")

# Step 4: Create retrieval system
print("🔄 Step 3: Setting up retrieval system...")

# Create a retriever from the vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}  # Return top 3 most similar chunks
)

# Create a custom prompt for the QA system
qa_prompt_template = """
You are a helpful assistant answering questions about company policies and procedures.

Use the following pieces of context to answer the question. If you don't know the answer based on the context provided, say that you don't have that information in the available documents.

Context:
{context}

Question: {question}

Please provide a clear, accurate answer based on the context above. If relevant, mention the specific policy or section you're referencing.

Answer:
"""

qa_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=qa_prompt_template
)

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": qa_prompt}
)

print("✅ RAG system ready!")

# Step 5: Test the system with various questions
test_questions = [
    "What are the requirements for remote work eligibility?",
    "How much PTO do I get after working here for 5 years?",
    "What should I do if I receive a suspicious email?",
    "Does the company provide any learning budget?",
    "What are the password requirements?",
    "Can I work from a coffee shop?",
    "What happens if I lose my company laptop?",
    "How much does the company contribute to health insurance?"
]

print("\n🎯 Testing RAG System with Sample Questions:")
print("="*60)

for i, question in enumerate(test_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    
    try:
        # Get answer from RAG system
        result = qa_chain({"query": question})
        
        print(f"🤖 Answer: {result['result']}")
        
        # Show which documents were used (for transparency)
        print(f"\n📋 Sources used:")
        for j, doc in enumerate(result['source_documents']):
            preview = doc.page_content[:100].replace('\n', ' ')
            print(f"   Source {j+1}: {preview}...")
        
        print("-" * 50)
        
    except Exception as e:
        print(f"❌ Error: {e}")

# Step 6: Advanced RAG - Custom scoring and filtering
print("\n\n🔬 Advanced RAG: Custom Retrieval with Scoring")
print("="*50)

def advanced_rag_query(question, similarity_threshold=0.7):
    """
    Advanced RAG function that shows similarity scores and filters results
    """
    try:
        # Get documents with scores
        docs_and_scores = vectorstore.similarity_search_with_score(question, k=5)
        
        print(f"🔍 Searching for: '{question}'")
        print(f"📊 Similarity Results:")
        
        relevant_docs = []
        for i, (doc, score) in enumerate(docs_and_scores):
            print(f"   Document {i+1}: Score {score:.3f}")
            if score < similarity_threshold:  # Lower score = more similar
                relevant_docs.append(doc)
                preview = doc.page_content[:80].replace('\n', ' ')
                print(f"   ✅ RELEVANT: {preview}...")
            else:
                print(f"   ⚪ Below threshold")
        
        if relevant_docs:
            # Create context from relevant documents
            context = "\n\n".join([doc.page_content for doc in relevant_docs])
            
            # Generate answer using the LLM
            response = llm.invoke(f"""
            Based on this context:
            {context}
            
            Question: {question}
            
            Provide a clear, accurate answer. If the context doesn't contain enough information, say so.
            """)
            
            print(f"\n🤖 Generated Answer:")
            print(response.content)
        else:
            print("❌ No sufficiently relevant documents found")
            
    except Exception as e:
        print(f"❌ Error in advanced RAG: {e}")

# Test advanced RAG
advanced_questions = [
    "What's the company's policy on working from public places?",
    "How often do I need to change my password?"
]

for question in advanced_questions:
    print("\n" + "="*60)
    advanced_rag_query(question)

print("\n✅ RAG (Retrieval-Augmented Generation) demonstration complete!")

📚 RAG System: Building AI That Knows Your Documents
🔄 Step 1: Processing documents...
✅ Created 6 document chunks
🔄 Step 2: Creating vector embeddings...
✅ Vector store created successfully
🔄 Step 3: Setting up retrieval system...
✅ RAG system ready!

🎯 Testing RAG System with Sample Questions:

❓ Question 1: What are the requirements for remote work eligibility?
🤖 Answer: To be eligible for remote work, employees must have been with the company for at least 6 months, have a role suitable for remote work approved by their department head, and have a dedicated workspace with reliable internet. These requirements are outlined in the "Remote Work Eligibility" section of the Remote Work Guidelines policy.

📋 Sources used:
   Source 1: Company Policy: Remote Work Guidelines      Effective Date: January 1, 2024      Our company support...
   Source 2: Performance and Communication:     - Weekly one-on-one meetings with direct supervisor     - Partici...
   Source 3: Other Benefits:     - Fle

In [21]:
# Example 6: Agents - AI That Can Think and Act (Updated Version)
from langchain_openai import ChatOpenAI
from langchain.agents import create_react_agent, AgentExecutor, Tool
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.prompts import PromptTemplate
from langchain import hub
import os
import requests
from datetime import datetime

# Initialize LLM
llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="gpt-3.5-turbo",
    temperature=0.1  # Lower temperature for more consistent reasoning
)

print("🤖 Building AI Agents That Can Think and Act")
print("="*50)

# Step 1: Create individual tools
print("🔧 Step 1: Creating Tools...")

# Tool 1: Web Search (using DuckDuckGo as a free alternative)
try:
    search_tool = DuckDuckGoSearchRun()
    print("✅ DuckDuckGo search tool created")
except ImportError:
    print("⚠️ DuckDuckGo tool not available. Install with: pip install duckduckgo-search")
    search_tool = None

# Tool 2: Calculator
def calculator(expression):
    """Safely evaluate mathematical expressions"""
    try:
        # Only allow safe mathematical operations
        allowed_chars = set('0123456789+-*/()., ')
        if all(c in allowed_chars for c in expression):
            result = eval(expression)
            return f"The result of {expression} is {result}"
        else:
            return "Invalid mathematical expression"
    except Exception as e:
        return f"Error calculating: {e}"

calculator_tool = Tool(
    name="Calculator",
    description="Useful for mathematical calculations. Input should be a mathematical expression like '2+2' or '10*5'.",
    func=calculator
)

# Tool 3: Current Date and Time
def get_current_datetime():
    """Get current date and time"""
    now = datetime.now()
    return f"Current date and time: {now.strftime('%Y-%m-%d %H:%M:%S')}"

datetime_tool = Tool(
    name="DateTime",
    description="Get the current date and time",
    func=get_current_datetime
)

# Tool 4: Text Analysis
def analyze_text(text):
    """Analyze text for word count, character count, and readability"""
    if not text or not isinstance(text, str):
        return "Error: Please provide valid text to analyze"
    
    words = len(text.split())
    characters = len(text)
    sentences = text.count('.') + text.count('!') + text.count('?')
    
    if sentences > 0:
        avg_words_per_sentence = words / sentences
        readability = "Easy" if avg_words_per_sentence < 15 else "Medium" if avg_words_per_sentence < 25 else "Difficult"
    else:
        avg_words_per_sentence = 0
        readability = "Unknown"
    
    return f"""
    Text Analysis Results:
    - Word count: {words}
    - Character count: {characters}
    - Sentence count: {sentences}
    - Average words per sentence: {avg_words_per_sentence:.1f}
    - Readability level: {readability}
    """

text_analysis_tool = Tool(
    name="TextAnalyzer",
    description="Analyze text for statistics like word count, readability, etc. Input should be the text to analyze.",
    func=analyze_text
)

# Tool 5: Simple Data Storage (simulating a database)
stored_data = {}

def store_information(key_value_pair):
    """Store information for later retrieval. Format: 'key: value'"""
    try:
        if ':' not in key_value_pair:
            return "Invalid format. Use 'key: value' format"
        key, value = key_value_pair.split(':', 1)
        key = key.strip()
        value = value.strip()
        stored_data[key] = value
        return f"Stored '{key}' = '{value}'"
    except Exception as e:
        return f"Error storing data: {e}"

def retrieve_information(key):
    """Retrieve stored information by key"""
    key = key.strip()
    if key in stored_data:
        return f"'{key}' = '{stored_data[key]}'"
    else:
        return f"No information found for key '{key}'. Available keys: {list(stored_data.keys())}"

storage_tool = Tool(
    name="DataStorage",
    description="Store information for later use. Input format: 'key: value'",
    func=store_information
)

retrieval_tool = Tool(
    name="DataRetrieval", 
    description="Retrieve previously stored information. Input should be the key name.",
    func=retrieve_information
)

# Combine all tools (only include search if available)
tools = [
    calculator_tool, 
    datetime_tool,
    text_analysis_tool,
    storage_tool,
    retrieval_tool
]

if search_tool:
    tools.insert(0, search_tool)

print("✅ Created tools:", [tool.name for tool in tools])

# Step 2: Create the agent with updated approach
print("\n🔧 Step 2: Initializing Agent...")

# Get the ReAct prompt template from LangChain Hub
try:
    prompt = hub.pull("hwchase17/react")
    print("✅ Retrieved ReAct prompt from LangChain Hub")
except:
    # Fallback to manual prompt if hub is not available
    prompt = PromptTemplate.from_template("""
Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought: {agent_scratchpad}
""")
    print("✅ Using fallback ReAct prompt")

# Create the ReAct agent
agent = create_react_agent(llm, tools, prompt)

# Create agent executor
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True,
    max_iterations=3,
    early_stopping_method="generate",
    handle_parsing_errors=True
)

print("✅ Agent initialized!")

# Step 3: Test the agent with various tasks
print("\n🎯 Step 3: Testing Agent Capabilities")
print("="*50)

test_scenarios = [
    {
        "task": "What's the current date and calculate how many days until New Year 2025?",
        "description": "Multi-step task requiring date lookup and calculation"
    },
    {
        "task": "Analyze this text for readability: 'Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed for every task.'",
        "description": "Text analysis task"
    },
    {
        "task": "Store this information: 'project_deadline: March 15, 2025', then retrieve it and tell me what you stored.",
        "description": "Data storage and retrieval"
    },
    {
        "task": "Calculate 15 * 23 + 47, then analyze the mathematical expression for word count.",
        "description": "Multiple tool usage"
    }
]

for i, scenario in enumerate(test_scenarios, 1):
    print(f"\n🎯 Scenario {i}: {scenario['description']}")
    print(f"📝 Task: {scenario['task']}")
    print("-" * 60)
    
    try:
        result = agent_executor.invoke({"input": scenario['task']})
        print(f"✅ Agent Result: {result['output']}")
    except Exception as e:
        print(f"❌ Agent Error: {e}")
    
    print("="*60)

# Step 4: Demonstrate agent reasoning with a complex task
print("\n\n🧠 Step 4: Complex Reasoning Task")
print("="*50)

complex_task = """
I need help planning a small project. Here's what I need you to do:
1. Store the project name as 'AI Tutorial Project'  
2. Get the current date
3. Calculate what the date will be 30 days from now
4. Analyze this project description for readability: 'This comprehensive tutorial covers LangChain fundamentals including chains, agents, memory systems, and real-world applications for building production-ready AI systems.'
5. Give me a summary of everything you've found and stored
"""

print(f"🎯 Complex Task: {complex_task}")
print("-" * 50)

try:
    result = agent_executor.invoke({"input": complex_task})
    print(f"🤖 Complex Task Result:")
    print(result['output'])
except Exception as e:
    print(f"❌ Error: {e}")

# Show what data was stored during interactions
print(f"\n📊 Data stored during agent interactions:")
for key, value in stored_data.items():
    print(f"   {key}: {value}")

print("\n✅ Agents demonstration complete!")

print("🔍 How Agents Work:")
print("""
1. OBSERVE: Agent receives a task from the user
2. THINK: Agent decides which tool(s) might be helpful
3. ACT: Agent uses the selected tool
4. OBSERVE: Agent examines the tool's output
5. THINK: Agent decides if more actions are needed
6. REPEAT: Until the task is complete or max iterations reached
""")

# Show what's in our stored data
print(f"\n📊 Data stored during agent interactions:")
for key, value in stored_data.items():
    print(f"   {key}: {value}")

print("\n✅ Agents demonstration complete!")

🤖 Building AI Agents That Can Think and Act
🔧 Step 1: Creating Tools...
⚠️ DuckDuckGo tool not available. Install with: pip install duckduckgo-search
✅ Created tools: ['Calculator', 'DateTime', 'TextAnalyzer', 'DataStorage', 'DataRetrieval']

🔧 Step 2: Initializing Agent...
✅ Retrieved ReAct prompt from LangChain Hub
✅ Agent initialized!

🎯 Step 3: Testing Agent Capabilities

🎯 Scenario 1: Multi-step task requiring date lookup and calculation
📝 Task: What's the current date and calculate how many days until New Year 2025?
------------------------------------------------------------


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mWe need to first get the current date and then calculate the number of days until New Year 2025.
Action: DateTime
Action Input: [0m❌ Agent Error: get_current_datetime() takes 0 positional arguments but 1 was given

🎯 Scenario 2: Text analysis task
📝 Task: Analyze this text for readability: 'Machine learning is a subset of artificial intelligence t

In [22]:
# Complete Customer Support Automation System
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

class CustomerSupportAI:
    def __init__(self, api_key):
        self.llm = ChatOpenAI(api_key=api_key, temperature=0.2)  # Low temperature for consistency
        self.embeddings = OpenAIEmbeddings(api_key=api_key)
        self.knowledge_base = None
        self.conversation_memory = {}  # Store conversations by customer ID
        
        # Intent classification
        self.intent_classifier = ChatPromptTemplate.from_template("""
        Classify the customer support request into one of these categories:
        - TECHNICAL_ISSUE: Problems with product functionality
        - BILLING_QUESTION: Payments, invoices, subscriptions
        - ACCOUNT_ACCESS: Login problems, password resets
        - FEATURE_REQUEST: Requests for new features
        - GENERAL_INQUIRY: General product/service questions
        - ESCALATION: Complex issues requiring a human agent
        
        Customer Message: {message}
        Classification: [Return only the category name]
        """)
        
        # Response generation template
        self.response_template = ChatPromptTemplate.from_template("""
        You are a helpful customer support agent for TechCorp, a software company.
        
        Customer Info:
        - Name: {customer_name}
        - Tier: {customer_tier}
        - Conversation History: {conversation_history}
        
        Issue Classification: {intent}
        Knowledge Base Info: {context}
        
        Customer Message: {message}
        
        Guidelines:
        1. Be empathetic and professional
        2. Provide step-by-step solutions
        3. Reference documentation when possible
        4. Escalate if problem can’t be fully solved
        5. Close with: "Is there anything else I can help you with today?"
        
        Response:
        """)
    
    def setup_knowledge_base(self, documents):
        """Setup knowledge base from company documents"""
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        
        all_chunks = []
        for doc in documents:
            chunks = text_splitter.split_text(doc)
            all_chunks.extend(chunks)
        
        # Create vector store
        self.knowledge_base = Chroma.from_texts(
            texts=all_chunks,
            embedding=self.embeddings,
            persist_directory="./support_kb"
        )
        
        print(f"✅ Knowledge base setup with {len(all_chunks)} chunks")
    
    def classify_intent(self, message):
        """Classify customer intent"""
        try:
            response = self.llm.invoke(self.intent_classifier.format(message=message))
            return response.content.strip()
        except:
            return "GENERAL_INQUIRY"
    
    def get_relevant_context(self, message, intent):
        """Retrieve relevant docs from knowledge base"""
        if not self.knowledge_base:
            return "No knowledge base available"
        
        search_query = f"{intent.lower().replace('_', ' ')} {message}"
        relevant_docs = self.knowledge_base.similarity_search(search_query, k=3)
        
        return "\n".join([doc.page_content for doc in relevant_docs])
    
    def _should_escalate(self, message, agent_response, intent):
        """Decides if escalation is needed"""
        # Escalation if classification already suggested it
        if intent == "ESCALATION":
            return True
        
        # Escalation if response apologizes without solving
        if "unable to" in agent_response.lower() or "escalate" in agent_response.lower():
            return True
        
        # Escalation if conversation is looping without resolution
        if "again" in message.lower() and "not working" in message.lower():
            return True
        
        return False

    def handle_support_request(self, customer_id, customer_name, customer_tier, message):
        """Main interface for handling customer requests"""
        if customer_id not in self.conversation_memory:
            self.conversation_memory[customer_id] = []
        
        # Intent
        intent = self.classify_intent(message)
        
        # Knowledge
        context = self.get_relevant_context(message, intent)
        
        # Conversation history (last 3 exchanges)
        conversation_history = "\n".join([
            f"Customer: {msg['customer']}\nAgent: {msg['agent']}" 
            for msg in self.conversation_memory[customer_id][-3:]
        ]) or "First interaction"
        
        # Generate response
        try:
            response = self.llm.invoke(self.response_template.format(
                customer_name=customer_name,
                customer_tier=customer_tier,
                conversation_history=conversation_history,
                intent=intent,
                context=context,
                message=message
            ))
            
            agent_response = response.content
            
            # Save to memory
            self.conversation_memory[customer_id].append({
                'customer': message,
                'agent': agent_response,
                'intent': intent
            })
            
            # Check escalation
            escalation_needed = self._should_escalate(message, agent_response, intent)
            
            return {
                'response': agent_response,
                'intent': intent,
                'escalation_needed': escalation_needed,
                'confidence': 'high' if len(context) > 100 else 'medium'
            }
            
        except Exception as e:
            return {
                'response': "⚠️ I'm experiencing technical difficulties. Please connect with a human support agent.",
                'intent': "ESCALATION",
                'escalation_needed': True,
                'confidence': 'low',
                'error': str(e)
            }


In [24]:
import os
from dotenv import load_dotenv

load_dotenv() 

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

support_ai = CustomerSupportAI(api_key=OPENAI_API_KEY)

In [25]:
documents = [
    "To reset your password, click on 'Forgot Password' on the login page and follow the instructions.",
    "Our billing cycle runs from the 1st to the 30th of each month...",
    "TechCorp's product supports integration with XYZ platform..."
]
support_ai.setup_knowledge_base(documents)

response = support_ai.handle_support_request(
    customer_id="cust123",
    customer_name="Priya Sharma",
    customer_tier="Premium",
    message="I can't log in to my account and resetting the password isn't working."
)
print(response)


✅ Knowledge base setup with 3 chunks
{'response': "Hello Priya Sharma,\n\nI'm sorry to hear that you're having trouble logging into your account. Let's try to resolve this together. Have you tried resetting your password by clicking on 'Forgot Password' on the login page? If not, please follow the instructions provided there to reset your password.\n\nIf you're still unable to log in after resetting your password, please provide me with any error messages you're receiving so I can better assist you. Additionally, please make sure you're entering the correct email address associated with your account.\n\nIf you continue to experience difficulties, please let me know and we can explore other solutions together. Remember, our goal is to get you back into your account as quickly as possible.\n\nIs there anything else I can help you with today?", 'intent': 'ACCOUNT_ACCESS', 'escalation_needed': True, 'confidence': 'high'}
