# RAG-Enhanced Market Research Agent

This notebook demonstrates how to enhance your market research agents with Retrieval-Augmented Generation (RAG) using:
- **Amazon Bedrock** for embeddings and text generation
- **Amazon OpenSearch** for vector storage
- **SageMaker** for managed infrastructure
- **Real market data** for accurate analysis

## Benefits of RAG for Market Research
- Access to current market data and reports
- Company-specific intelligence integration
- Regulatory document retrieval
- Historical market analysis

In [None]:
# Install required packages
!pip install -q boto3 langchain opensearch-py faiss-cpu PyPDF2 python-docx
!pip install -q langchain-aws langchain-community

In [None]:
import boto3
import json
import numpy as np
from typing import List, Dict, Any
from langchain.embeddings import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

## 1. Setup Bedrock Client and Embeddings

In [None]:
# Initialize Bedrock client
bedrock_client = boto3.client(
    service_name='bedrock-runtime',
    region_name='us-east-1'
)

# Initialize embeddings model
embeddings = BedrockEmbeddings(
    client=bedrock_client,
    model_id="amazon.titan-embed-text-v1"
)

# Initialize LLM
llm = BedrockChat(
    client=bedrock_client,
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",
    model_kwargs={
        "max_tokens": 4000,
        "temperature": 0.1
    }
)

print("✅ Bedrock client and models initialized successfully")

## 2. Save Vector Store with S3 Support

In [None]:
import tarfile
import tempfile
import os

# Configuration - UPDATE THESE VALUES
S3_BUCKET = "your-sagemaker-bucket"  # Replace with your bucket name
S3_PREFIX = "vectorstores/market-research/"
LOCAL_STORE_NAME = "market_research_vectorstore"

def save_vectorstore_to_s3(vectorstore, bucket_name: str, s3_prefix: str, local_name: str):
    """Save vectorstore locally and upload to S3"""
    
    # 1. Save locally first
    print("💾 Saving vectorstore locally...")
    vectorstore.save_local(local_name)
    print(f"✅ Local save complete: {local_name}/")
    
    # 2. Create compressed archive
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    archive_name = f"{local_name}_{timestamp}.tar.gz"
    
    print(f"📦 Creating archive: {archive_name}")
    with tarfile.open(archive_name, "w:gz") as tar:
        tar.add(local_name, arcname=local_name)
    
    # 3. Upload to S3
    s3_client = boto3.client('s3')
    s3_key = f"{s3_prefix}{archive_name}"
    
    try:
        print(f"☁️ Uploading to S3: s3://{bucket_name}/{s3_key}")
        s3_client.upload_file(archive_name, bucket_name, s3_key)
        print("✅ S3 upload successful!")
        
        # Also save the latest version
        latest_key = f"{s3_prefix}{local_name}_latest.tar.gz"
        s3_client.upload_file(archive_name, bucket_name, latest_key)
        print(f"✅ Latest version saved: s3://{bucket_name}/{latest_key}")
        
        # Clean up local archive
        os.remove(archive_name)
        
        return {
            "s3_uri": f"s3://{bucket_name}/{s3_key}",
            "latest_uri": f"s3://{bucket_name}/{latest_key}",
            "local_path": local_name
        }
        
    except Exception as e:
        print(f"❌ S3 upload failed: {str(e)}")
        print("💡 Vectorstore is still saved locally for use")
        return {"local_path": local_name, "error": str(e)}

def load_vectorstore_from_s3(bucket_name: str, s3_prefix: str, local_name: str, embeddings):
    """Load vectorstore from S3 or local storage"""
    
    # Try local first
    if os.path.exists(local_name):
        print(f"📂 Loading from local storage: {local_name}")
        return FAISS.load_local(local_name, embeddings)
    
    # Try S3
    s3_client = boto3.client('s3')
    latest_key = f"{s3_prefix}{local_name}_latest.tar.gz"
    
    try:
        print(f"☁️ Downloading from S3: s3://{bucket_name}/{latest_key}")
        
        # Download archive
        archive_name = f"{local_name}_latest.tar.gz"
        s3_client.download_file(bucket_name, latest_key, archive_name)
        
        # Extract archive
        print("📦 Extracting archive...")
        with tarfile.open(archive_name, "r:gz") as tar:
            tar.extractall()
        
        # Clean up archive
        os.remove(archive_name)
        
        print("✅ S3 download and extraction complete")
        return FAISS.load_local(local_name, embeddings)
        
    except Exception as e:
        print(f"❌ S3 download failed: {str(e)}")
        raise FileNotFoundError(f"Vectorstore not found locally or in S3: {local_name}")

print("✅ S3 vectorstore functions ready!")
print("💡 Update S3_BUCKET variable above before using S3 features")