In [9]:
import getpass
import os

os.environ['LLAMA_CLOUD_API_KEY'] = getpass.getpass()

In [None]:
import os
import asyncio
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Initialize LlamaParse
parser = LlamaParse(
    llama_cloud_api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"
)

PERSIST_DIR = "./storage"
WALKTHROUGH_DIR = "./walkthrough_rewrites"

def extract_metadata(content):
    metadata = {}
    lines = content.split('\n')
    for line in lines[:5]:  # Check first 5 lines for metadata
        if line.startswith('Game:'):
            metadata['game'] = line.split(':')[1].strip()
        elif line.startswith('Part:'):
            metadata['part'] = line.split(':')[1].strip()
        elif line.startswith('Keywords:'):
            metadata['keywords'] = [kw.strip() for kw in line.split(':')[1].split(',')]
    return metadata

def create_relationships(metadata):
    relationships = {}
    game = metadata.get('game')
    part = metadata.get('part')
    
    if game and part:
        part_num = int(part)
        if part_num > 1:
            relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
                node_id=f"{game} - Part {part_num - 1}",
                metadata={"game": game, "part": str(part_num - 1)}
            )
        relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
            node_id=f"{game} - Part {part_num + 1}",
            metadata={"game": game, "part": str(part_num + 1)}
        )
    
    return relationships

async def process_walkthroughs():
    all_nodes = []
    
    for game_folder in os.listdir(WALKTHROUGH_DIR):
        game_path = os.path.join(WALKTHROUGH_DIR, game_folder)
        if os.path.isdir(game_path):
            for walkthrough_file in os.listdir(game_path):
                if walkthrough_file.endswith('.txt'):
                    file_path = os.path.join(game_path, walkthrough_file)
                    print(f"Processing: {file_path}")
                    
                    try:
                        documents = await parser.aload_data(file_path)
                        
                        if documents:
                            doc = documents[0]
                            metadata = extract_metadata(doc.text)
                            relationships = create_relationships(metadata)
                            
                            node = TextNode(
                                text=doc.text,
                                metadata=metadata,
                                relationships=relationships,
                                id_=f"{metadata.get('game', 'Unknown')} - Part {metadata.get('part', 'Unknown')}"
                            )
                            
                            all_nodes.append(node)
                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")
    
    return all_nodes

async def main():
    # Delete existing storage if it exists
    if os.path.exists(PERSIST_DIR):
        print("Deleting existing storage...")
        import shutil
        shutil.rmtree(PERSIST_DIR)
    
    print("Processing all walkthroughs...")
    all_nodes = await process_walkthroughs()
    
    print(f"Total nodes created: {len(all_nodes)}")
    
    print("Creating new index...")
    index = VectorStoreIndex(all_nodes)
    
    print("Persisting new index...")
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    
    # Example query
    query_engine = index.as_query_engine()
    response = query_engine.query("How do I beat the first gym in Pokémon Black and White?")
    print("\nQuery Response:")
    print(response)

# Run the async main function
asyncio.run(main())

In [11]:
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers import get_response_synthesizer
from llama_index.llms.openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

class WalkthroughAgent:
    def __init__(self):
        # Load the index
        storage_context = StorageContext.from_defaults(persist_dir="./storage")
        self.index = load_index_from_storage(storage_context)
        
        # Create a retriever
        self.retriever = VectorIndexRetriever(
            index=self.index,
            similarity_top_k=3  # Retrieve top 3 most relevant nodes
        )

        # Create an LLM
        self.llm = OpenAI(temperature=0, model="gpt-4")

        # Create a response synthesizer
        self.response_synthesizer = get_response_synthesizer(
            response_mode="compact",
            use_async=True
        )

        # Create a query engine
        self.query_engine = RetrieverQueryEngine(
            retriever=self.retriever,
            response_synthesizer=self.response_synthesizer,
            node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
        )

    async def run(self, query: str) -> dict:
        try:
            # Get the response from the query engine
            response = await self.query_engine.aquery(query)
            
            # Extract source nodes and their metadata
            source_nodes = response.source_nodes
            sources = []
            for node in source_nodes:
                sources.append({
                    'node_id': node.node.node_id,
                    'metadata': node.node.metadata,
                    'score': node.score,
                })
            
            return {
                "answer": response.response,
                "sources": sources
            }
        except Exception as e:
            return {
                "error": f"An error occurred while processing your question: {str(e)}",
                "sources": []
            }

# Usage example
async def main():
    agent = WalkthroughAgent()
    result = await agent.run("How do I beat the first gym in Pokémon Black and White?")
    
    print("Answer:")
    print(result['answer'])
    print("\nSources:")
    for source in result['sources']:
        print(f"Node ID: {source['node_id']}")
        print(f"Metadata: {source['metadata']}")
        print(f"Relevance Score: {source['score']}")
        print("---")

# Run the example
await main()

Answer:
Use Fire- and Flying-type moves against Whirlipede and Leavanny, and Water- or Rock-type moves against Dwebble. Alternatively, you can use TM70 (Flash) to lower the opponents' accuracy.

Sources:
Node ID: HeartGold_and_SoulSilver - Part 27
Metadata: {'game': 'HeartGold_and_SoulSilver', 'part': '27', 'keywords': ['Viridian Gym', "Oak's Lab", 'Kanto Power Plant', 'Cerulean Cave']}
Relevance Score: 0.837561583533997
---
Node ID: Black_and_White - Part 15
Metadata: {'game': 'Black_and_White', 'part': '15', 'keywords': ['Champion']}
Relevance Score: 0.8310355877712552
---
Node ID: Black_and_White - Part 4
Metadata: {'game': 'Black_and_White', 'part': '4', 'keywords': ['Pinwheel Forest', 'Skyarrow Bridge', 'Castelia City', 'Castelia Gym']}
Relevance Score: 0.8290938738401904
---


In [6]:
# Pipeline that processes rewritten walkthroughs, embeds them in vector store with metadata and relationships
import os
import asyncio
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo
from dotenv import load_dotenv

load_dotenv()

parser = LlamaParse(
    llama_cloud_api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"
)

WALKTHROUGH_DIR = "./walkthrough_rewrites"
PERSIST_DIR = "./storage"

def extract_metadata(content):
    metadata = {}
    lines = content.split('\n')
    for line in lines[:5]:  # Check first 5 lines for metadata
        if line.startswith('# Game:'):
            metadata['game'] = line.split(':')[1].strip()
        elif line.startswith('# Part:'):
            metadata['part'] = line.split(':')[1].strip()
        elif line.startswith('# Keywords:'):
            metadata['keywords'] = [kw.strip() for kw in line.split(':')[1].split(',')]
    return metadata

def create_relationships(metadata):
    relationships = {}
    game = metadata.get('game')
    part = metadata.get('part')
    
    if game and part:
        part_num = int(part)
        if part_num > 1:
            relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
                node_id=f"{game} - Part {part_num - 1}",
                metadata={"game": game, "part": str(part_num - 1)}
            )
        relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
            node_id=f"{game} - Part {part_num + 1}",
            metadata={"game": game, "part": str(part_num + 1)}
        )
    
    return relationships

async def process_single_file(file_path):
    print(f"Processing: {file_path}")
    
    try:
        documents = await parser.aload_data(file_path)
        
        if documents:
            doc = documents[0]
            metadata = extract_metadata(doc.text)
            relationships = create_relationships(metadata)
            
            node = TextNode(
                text=doc.text,
                metadata=metadata,
                relationships=relationships,
                id_=f"{metadata.get('game', 'Unknown')} - Part {metadata.get('part', 'Unknown')}"
            )
            
            print(f"Created node: {node.id_}")
            return node
        else:
            print(f"No documents were parsed for {file_path}")
            return None
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

async def process_all_walkthroughs():
    all_nodes = []
    
    for game_folder in os.listdir(WALKTHROUGH_DIR):
        game_path = os.path.join(WALKTHROUGH_DIR, game_folder)
        if os.path.isdir(game_path):
            for walkthrough_file in os.listdir(game_path):
                if walkthrough_file.endswith('.txt'):
                    file_path = os.path.join(game_path, walkthrough_file)
                    node = await process_single_file(file_path)
                    if node:
                        all_nodes.append(node)
    
    return all_nodes

# This line enables async support in Jupyter
%autoawait asyncio

async def main():
    # Process all walkthrough files
    print("Processing all walkthroughs...")
    all_nodes = await process_all_walkthroughs()
    print(f"Total nodes created: {len(all_nodes)}")
    
    # Create and save the index
    print("Creating new index...")
    index = VectorStoreIndex(all_nodes)
    
    print("Persisting new index...")
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    
    # Example query
    query_engine = index.as_query_engine()
    response = query_engine.query("How do I beat the first gym in Pokémon Black and White?")
    print("\nQuery Response:")
    print(response)

# Run the main function
await main()

Processing all walkthroughs...
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 1 - Introduction, Aspertia City, Route 19, Floccesy Town.txt
Started parsing the file under job_id 0f5d04ac-dd10-4df2-953c-f798c3326b18
Created node: Black2_and_White2 - Part 1
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 10 - Mistralton City, Route 7, Celestial Tower, Mistralton Gym.txt
Started parsing the file under job_id 329b0f83-a03d-4ce9-9710-cd561a0f34ff
Created node: Black2_and_White2 - Part 10
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 11 - Lentimas Town, Strange House, Reversal Mountain, Undella Town, Undella Bay.txt
Started parsing the file under job_id 1ad9ca8a-9391-4c4e-ab95-94652de3197e
Created node: Black2_and_White2 - Part 11
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 12 - Route 13, Lacunosa Town, Route 12, Village Bridge, Route 11.txt
Started parsing the file u

In [9]:
import random
from llama_index.core import StorageContext, load_index_from_storage

def inspect_random_nodes(num_nodes=5):
    # Load the index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    
    # Get all nodes
    all_nodes = index.docstore.docs.values()
    
    # Select random nodes
    sample_nodes = random.sample(list(all_nodes), min(num_nodes, len(all_nodes)))
    
    for i, node in enumerate(sample_nodes, 1):
        print(f"\nInspecting Node {i}:")
        print(f"ID: {node.id_}")
        print("Metadata:")
        for key, value in node.metadata.items():
            print(f"  {key}: {value}")
        print("Relationships:")
        for rel_type, rel_info in node.relationships.items():
            print(f"  {rel_type}: {rel_info.node_id}")
        print("Text Preview:")
        print(node.text[:200] + "...")  # First 200 characters of the text
        print("-" * 50)

# Inspect 5 random nodes
inspect_random_nodes(5)


Inspecting Node 1:
ID: Gold_and_Silver - Part 7
Metadata:
  game: Gold_and_Silver
  part: 7
  keywords: ['Route 35', 'National Park', 'Route 36', 'Route 37']
Relationships:
  NodeRelationship.PREVIOUS: Gold_and_Silver - Part 6
  NodeRelationship.NEXT: Gold_and_Silver - Part 8
Text Preview:
# Game: Gold_and_Silver

# Part: 7

# Keywords: Route 35, National Park, Route 36, Route 37

# Pokémon Gold and Silver Walkthrough - Part 7: Route 35, National Park, Route 36, Route 37

# Route 35

Lo...
--------------------------------------------------

Inspecting Node 2:
ID: X_and_Y - Part 7
Metadata:
  game: X_and_Y
  part: 7
  keywords: ['Route 12', 'Azure Bay', 'Coumarine City', 'Coumarine Gym', 'Route 13', 'Kalos Power Plant']
Relationships:
  NodeRelationship.PREVIOUS: X_and_Y - Part 6
  NodeRelationship.NEXT: X_and_Y - Part 8
Text Preview:
# Game: X_and_Y

# Part: 7

# Keywords: Route 12, Azure Bay, Coumarine City, Coumarine Gym, Route 13, Kalos Power Plant

# Part 7 Walkthrough Summary: Ro

In [18]:
# Parses and loads docs into llama index from walkthrough_rewrites

from llama_index.core import VectorStoreIndex, StorageContext
import nest_asyncio
nest_asyncio.apply()
import os
import shutil
from llama_parse import LlamaParse
import asyncio

parser = LlamaParse(
    llama_cloud_api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"
)

PERSIST_DIR = "./storage"
WALKTHROUGH_DIR = "./walkthrough_rewrites"

async def process_walkthroughs():
    all_documents = []
    
    for game_folder in os.listdir(WALKTHROUGH_DIR):
        game_path = os.path.join(WALKTHROUGH_DIR, game_folder)
        if os.path.isdir(game_path):
            for walkthrough_file in os.listdir(game_path):
                if walkthrough_file.endswith('.txt'):
                    file_path = os.path.join(game_path, walkthrough_file)
                    print(f"Processing: {file_path}")
                    documents = await parser.aload_data(file_path)
                    all_documents.extend(documents)
    
    return all_documents

async def main():
    # Delete existing storage if it exists
    if os.path.exists(PERSIST_DIR):
        print("Deleting existing storage...")
        shutil.rmtree(PERSIST_DIR)
    
    print("Processing all walkthroughs...")
    all_documents = await process_walkthroughs()
    
    print("Creating new index...")
    index = VectorStoreIndex.from_documents(all_documents)
    
    print("Persisting new index...")
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    
    query_engine = index.as_query_engine()
    response = query_engine.query("How do I beat Bianca's challenge?")
    print("\nQuery Response:")
    print(response)

# Run the async main function
asyncio.run(main())

Deleting existing storage...
Processing all walkthroughs...
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 1 - Introduction, Aspertia City, Route 19, Floccesy Town.txt
Started parsing the file under job_id 83a98112-2a0f-4229-b93a-1ef7012062f4
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 10 - Mistralton City, Route 7, Celestial Tower, Mistralton Gym.txt
Started parsing the file under job_id 036fba66-7694-4527-8e6a-663e39edc54f
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 11 - Lentimas Town, Strange House, Reversal Mountain, Undella Town, Undella Bay.txt
Started parsing the file under job_id 53119669-ce26-4093-92c2-00bc6d363d20
Processing: ./walkthrough_rewrites\Black2_and_White2\Black2_and_White2 - Part 12 - Route 13, Lacunosa Town, Route 12, Village Bridge, Route 11.txt
Started parsing the file under job_id a4a69d3c-0cae-4dfe-a4f8-f0bcc51b3e82
Processing: ./walkthrough_rewrites\Black2_and_W

In [26]:
import os
from llama_parse import LlamaParse
import asyncio
from pprint import pprint
import re

parser = LlamaParse(
    llama_cloud_api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"
)

WALKTHROUGH_DIR = "./walkthrough_rewrites"

def extract_metadata(content):
    metadata = {}
    lines = content.split('\n')
    for line in lines[:5]:  # Check first 5 lines for metadata
        if line.startswith('# Game:'):
            metadata['game'] = line.split(':')[1].strip()
        elif line.startswith('# Part:'):
            metadata['part'] = line.split(':')[1].strip()
        elif line.startswith('# Keywords:'):
            metadata['keywords'] = [kw.strip() for kw in line.split(':')[1].split(',')]
    return metadata

async def parse_sample_document():
    sample_path = os.path.join(WALKTHROUGH_DIR, "Black_and_White", "Black_and_White - Part 1 - Introduction, Nuvema Town, Juniper's Lab, Route 1, Accumula Town.txt")
    
    print(f"Parsing sample document: {sample_path}")
    
    parsed_docs = await parser.aload_data(sample_path)
    
    print(f"\nNumber of parsed documents: {len(parsed_docs)}")
    
    if parsed_docs:
        first_doc = parsed_docs[0]
        
        # Extract and add metadata
        metadata = extract_metadata(first_doc.text)
        first_doc.metadata.update(metadata)
        
        print("\nExamining first parsed document with added metadata:")
        print(f"Type: {type(first_doc)}")
        print("\nMetadata:")
        pprint(first_doc.metadata)
        print("\nContent preview (first 500 characters):")
        print(first_doc.text[:500] + "...")
        
        print("\nDocument structure:")
        pprint(vars(first_doc))
    else:
        print("No documents were parsed.")

# Run the async function
await parse_sample_document()

Parsing sample document: ./walkthrough_rewrites\Black_and_White\Black_and_White - Part 1 - Introduction, Nuvema Town, Juniper's Lab, Route 1, Accumula Town.txt
Started parsing the file under job_id 9e8babe6-8e72-4c37-9f83-ee795d4f385c

Number of parsed documents: 1

Examining first parsed document with added metadata:
Type: <class 'llama_index.core.schema.Document'>

Metadata:
{'game': 'Black_and_White',
 'keywords': ['Introduction',
              'Nuvema Town',
              "Juniper's Lab",
              'Route 1',
              'Accumula Town'],
 'part': '1'}

Content preview (first 500 characters):
# Game: Black_and_White

# Part: 1

# Keywords: Introduction, Nuvema Town, Juniper's Lab, Route 1, Accumula Town

# Part 1: Nuvema Town to Accumula Town Walkthrough

# 1. Starting Out

- Select New Game and choose your character's gender.
- Professor Juniper introduces you to the Pokémon world and your rivals, Cheren and Bianca. Enter your name.

# 2. Nuvema Town

- In your room, choose 

In [4]:
import requests
from bs4 import BeautifulSoup
import os
from time import sleep

def get_html(url):
    response = requests.get(url)
    return response.text

def parse_walkthrough_parts(game_url):
    html_content = get_html(game_url)
    soup = BeautifulSoup(html_content, 'html.parser')
    parts = []

    table = soup.find('table', {'class': 'roundy'})

    if table:
        for row in table.find_all('tr', {'style': 'background: #FFF;'}):
            part = row.find('th')
            keywords = row.find('td')
            if part and keywords:
                part_name = part.get_text(strip=True)
                part_url = 'https://bulbapedia.bulbagarden.net' + part.find('a')['href']
                keywords_text = keywords.get_text(strip=True)
                parts.append((part_name, part_url, keywords_text))

    return parts

def scrape_walkthrough_text(part_url):
    html_content = get_html(part_url)
    soup = BeautifulSoup(html_content, 'html.parser')
    
    walkthrough_div = soup.find('div', class_='mw-parser-output')
    if walkthrough_div:
        walkthrough_text = walkthrough_div.get_text(separator='\n', strip=True)
        return walkthrough_text
    return ""

def scrape_core_game_walkthrough(core_game_url):
    parts = parse_walkthrough_parts(core_game_url)
    walkthrough_data = []

    for part_name, part_url, keywords in parts:
        text = scrape_walkthrough_text(part_url)
        walkthrough_data.append({
            'Part': part_name,
            'URL': part_url,
            'Keywords': keywords,
            'Text': text
        })
        sleep(1)  # To avoid overwhelming the server

    return walkthrough_data

def save_walkthrough_as_text(game_name, walkthrough_data):
    folder_path = os.path.join('walkthroughs', game_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    for part in walkthrough_data:
        filename = os.path.join(folder_path, f"{part['Part'].replace('/', '_')}.txt")
        
        with open(filename, 'w', encoding='utf-8') as file:
            file.write(f"Part: {part['Part']}\n")
            file.write(f"URL: {part['URL']}\n")
            file.write(f"Keywords: {part['Keywords']}\n\n")
            file.write("Walkthrough Text:\n")
            file.write(part['Text'])
        
        print(f"Saved {filename}")

def main():
    core_games = [
        ('LegendsArceus', 'https://bulbapedia.bulbagarden.net/wiki/Appendix:Legends:_Arceus_walkthrough'),
        ('Scarlet_and_Violet', 'https://bulbapedia.bulbagarden.net/wiki/Appendix:Scarlet_and_Violet_walkthrough')
    ]

    # Create the main walkthroughs folder if it doesn't exist
    if not os.path.exists('walkthroughs'):
        os.makedirs('walkthroughs')

    for game_name, game_url in core_games:
        print(f"Scraping walkthrough for {game_name}...")
        walkthrough_data = scrape_core_game_walkthrough(game_url)
        save_walkthrough_as_text(game_name, walkthrough_data)
        print(f"Finished scraping {game_name}\n")

if __name__ == '__main__':
    main()

Scraping walkthrough for LegendsArceus...
Saved walkthroughs\LegendsArceus\Part 1.txt
Saved walkthroughs\LegendsArceus\Part 2.txt
Saved walkthroughs\LegendsArceus\Part 3.txt
Saved walkthroughs\LegendsArceus\Part 4.txt
Saved walkthroughs\LegendsArceus\Part 5.txt
Saved walkthroughs\LegendsArceus\Part 6.txt
Saved walkthroughs\LegendsArceus\Part 7.txt
Saved walkthroughs\LegendsArceus\Part 8.txt
Saved walkthroughs\LegendsArceus\Part 9.txt
Saved walkthroughs\LegendsArceus\Part 10.txt
Saved walkthroughs\LegendsArceus\Part 11.txt
Saved walkthroughs\LegendsArceus\Part 12.txt
Saved walkthroughs\LegendsArceus\Part 13.txt
Saved walkthroughs\LegendsArceus\Part 14.txt
Saved walkthroughs\LegendsArceus\Part 15.txt
Saved walkthroughs\LegendsArceus\Part 16.txt
Saved walkthroughs\LegendsArceus\Part 17.txt
Saved walkthroughs\LegendsArceus\Requests: Part 1.txt
Saved walkthroughs\LegendsArceus\Requests: Part 2.txt
Saved walkthroughs\LegendsArceus\Requests: Part 3.txt
Saved walkthroughs\LegendsArceus\Request

In [6]:
import os
from typing import List
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from dotenv import load_dotenv

load_dotenv()

def load_and_embed_walkthroughs(file_paths: List[str]) -> FAISS:
    documents = []
    for file_path in file_paths:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                # Extract metadata from the content
                lines = content.split('\n')
                metadata = {
                    'game': lines[0].split(': ')[1],
                    'part': lines[1].split(': ')[1],
                    'keywords': lines[2].split(': ')[1],
                    'file_name': os.path.basename(file_path)
                }
                # Create a Document with the content and metadata
                doc = Document(page_content=content, metadata=metadata)
                documents.append(doc)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(texts, embeddings)
    
    return vectorstore

# Test with the first 4 files
test_files = [
    r"walkthrough_rewrites\Black_and_White\Black_and_White - Part 1 - Introduction, Nuvema Town, Juniper's Lab, Route 1, Accumula Town.txt",
    r"walkthrough_rewrites\Black_and_White\Black_and_White - Part 2 - Route 2, Striaton City, The Dreamyard, Striaton Gym.txt",
    r"walkthrough_rewrites\Black_and_White\Black_and_White - Part 3 - Route 3, Wellspring Cave, Nacrene City, Nacrene Gym.txt",
    r"walkthrough_rewrites\Black_and_White\Black_and_White - Part 4 - Pinwheel Forest, Skyarrow Bridge, Castelia City, Castelia Gym.txt"
]

# Create and save the test vectorstore
test_vectorstore = load_and_embed_walkthroughs(test_files)
test_vectorstore.save_local("test_vectorstore")

print("Test vectorstore created and saved successfully!")

# Function to get all walkthrough files
def get_all_walkthrough_files(root_dir: str) -> List[str]:
    all_files = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.txt'):
                all_files.append(os.path.join(root, file))
    return all_files

# Uncomment the following lines when ready to process all files
all_walkthrough_files = get_all_walkthrough_files('walkthrough_rewrites')
full_vectorstore = load_and_embed_walkthroughs(all_walkthrough_files)
full_vectorstore.save_local("full_vectorstore")
print("Full vectorstore created and saved successfully!")

Created a chunk of size 1081, which is longer than the specified 1000
Created a chunk of size 1067, which is longer than the specified 1000
Created a chunk of size 1236, which is longer than the specified 1000


Test vectorstore created and saved successfully!
Full vectorstore created and saved successfully!
