In [None]:
#!pip install beautifulsoup4

### **CODE FOR SCRAPING THE DATA**

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time

def scrape_product_page(url):
    print(f"Scraping product page: {url}")
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    description = soup.find('div', class_='pd__description pd__wrap mt-3').find('div').text.strip() if soup.find('div', class_='pd__description pd__wrap mt-3') else ''
    #price = soup.find('span', class_='price pd__price')['content'] if soup.find('span', class_='price') else ''
    #video_link = soup.find('div', class_='yt-video').find('iframe')['src'] if soup.find('div', class_='yt-video') else ''
    symptoms_header = soup.find('div', class_='pd__wrap row').find('div', class_='bold mb-1')
    symptoms = symptoms_header.find_next_sibling(text=True).strip().split('|')
    symptoms = [s.strip() for s in symptoms]
    qna = soup.find_all('div', class_='qna__question js-qnaResponse')
    qna_list = []

    for q in qna:
        q = q.find_all('div', class_='js-searchKeys')
        qna_list.append({
            'question': q[0].text.strip(),
            'answer': q[1].text.strip()})
    
    find_models = soup.find('div', class_='pd__crossref__list js-dataContainer js-infiniteScroll')
    model_list_container = find_models.find_all('div', class_='row')
    model_list = []
    for e in model_list_container:
        model_list.append({'Brand': e.find('div', class_='col-6 col-md-3').text.strip(), 'Model': e.find('a').text.strip(), 'model_link': e.find('a')['href']})
    
    return {
        'description': description,
        #'assist_video': video_link,
        'compatible_models_list': model_list,
        'qna_list': qna_list,
        'Symptoms_fixed': symptoms 
    }

def scrape_category_parts(category_url):
    base_url = "https://www.partselect.com"
    all_parts = []
    
    response = requests.get(category_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the list of all part categories
    all_nf_links = soup.find_all('ul', class_='nf__links')
    if len(all_nf_links) >= 2:
        part_categories = all_nf_links[1]
        if part_categories:
            for li in part_categories.find_all('li'):
                subcategory_url = base_url + li.find('a')['href']
                subcategory_name = li.find('a').text.strip()
            
                print(f"Scraping subcategory: {subcategory_name}")
            
            # Scrape each subcategory
                subcategory_response = requests.get(subcategory_url)
                subcategory_soup = BeautifulSoup(subcategory_response.content, 'html.parser')
            
                parts = subcategory_soup.find_all('div', class_='nf__part__detail')
            
                for part in parts[:3]:
                    a = part.find('a', class_='nf__part__detail__title')
                    name = a.find('span').text.strip()
                    ps_number = part.find('div', class_='nf__part__detail__part-number').find('strong').text.strip()
                    mfg_number = part.find('div', class_='nf__part__detail__part-number mb-2').find('strong').text.strip()
                
                    product_url = base_url + a['href']
                
                    product_info = scrape_product_page(product_url)
                
                    all_parts.append({
                        'name': name,
                        'ps_number': ps_number,
                        'mfg_number': mfg_number,
                        'category': subcategory_name,
                        'product_url': product_url,
                        'description': product_info['description'],
                        'qna_list': product_info['qna_list'],
                        'compatible_models': product_info['compatible_models_list'],
                        'Symptoms fixed': product_info['Symptoms_fixed']
                    })
                
                    time.sleep(1)  # Be nice to the server
    
    return all_parts

def scrape_partselect():
    base_url = "https://www.partselect.com"
    categories = [
        "/Refrigerator-Parts.htm",
        "/Dishwasher-Parts.htm"
    ]
    
    all_parts = []

    for category in categories:
        url = base_url + category
        print(f"Scraping category: {category}")
        category_parts = scrape_category_parts(url)
        all_parts.extend(category_parts)
    
    return all_parts

In [None]:
scraped_data = scrape_partselect()

In [None]:
scraped_data[0]

In [None]:
scraped_data[0]['compatible_models']

In [None]:
# Save to JSON
with open('partselect_data.json', 'w', encoding='utf-8') as file:
    json.dump(scraped_data, file, ensure_ascii=False, indent=4)

print(f"Scraped {len(scraped_data)} parts and saved to partselect_data.json")

## ***ADDING DATA TO THE GRAPH DATABASE***

In [27]:
import json
from create_database import Neo4jLoader
from dotenv import load_dotenv
import os
load_dotenv

# Set up OpenAI API key

loader = Neo4jLoader(os.environ['NEO4J_URI'], os.environ['NEO4J_USER'], os.environ['NEO4J_PASSWORD'])

# Load your parts data
with open('partselect_data.json', 'r') as f:
    parts_data = json.load(f)

loader.load_data(parts_data)

loader.close()

In [None]:
# from typing import Dict, Any, List, TypedDict, Annotated
# from langgraph.graph import StateGraph, START, END 
# from neo4j import GraphDatabase
# from openai import OpenAI
# from sentence_transformers import SentenceTransformer
# from IPython.display import Image, display
# from dotenv import load_dotenv
# import json



# # Initialize Neo4j connection and OpenAI client
# NEO4J_URI = ''
# NEO4J_USER = ""
# NEO4J_PASSWORD = ""
# driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
# load_dotenv()
# client = OpenAI()


# class State(TypedDict):
#     user_input: str
#     conversation_history: Annotated[List[str], "mutable"]
#     extracted_info: Dict[str, str]
#     tool_output: Dict[str, Any]
#     next_step: str
#     tool_explanation: Annotated[List[str], "mutable"]
#     generated_response: str
#     feedback: str
    
# # Helper functions
# def query_neo4j(query, params=None):
#     with driver.session() as session:
#         result = session.run(query, params)
#         return [record.data() for record in result]

# def embed_text(text: str) -> List[float]:
#     embedder = SentenceTransformer('all-MiniLM-L6-v2')
#     response = embedder.encode([text])
#     return response.data[0].embedding

# class RecommendationTool:
#     def __init__(self, uri: str, user: str, password: str, model_name: str = 'all-MiniLM-L6-v2'):
#         self.driver = GraphDatabase.driver(uri, auth=(user, password))
#         self.model = SentenceTransformer(model_name)

#     def recommend(self, query: str, top_k: int = 5) -> List[Dict]:
#         # Convert query to embedding
#         embedding = self.model.encode(query).tolist()

#         with self.driver.session() as session:
#             return session.read_transaction(self._similarity_search, embedding, top_k)

#     def _similarity_search(self, tx, embedding: List[float], top_k: int) -> List[Dict]:
#         query = """
#         CALL db.index.vector.queryNodes('part_embeddings', $top_k, $embedding) 
#         YIELD node, score
#         RETURN node.name AS name, node.description AS description, score
#         """
#         result = tx.run(query, top_k=top_k, embedding=embedding)
#         return [dict(record) for record in result]

#     def close(self):
#         self.driver.close()

# # Define tools
# class InfoRetrievalTool:
#     def __init__(self, uri, user, password):
#         self.driver = GraphDatabase.driver(uri, auth=(user, password))

#     def __call__(self, extracted_info: Dict[str, str]) -> Dict[str, Any]:
#         with self.driver.session() as session:
#             return session.read_transaction(self._get_part_info, extracted_info)

#     def _get_part_info(self, tx, extracted_info: Dict[str, str]) -> Dict[str, Any]:
#         query = """
#         MATCH (p:Part)
#     WHERE 
#     ($ps_number <> '' AND p.ps_number = $ps_number) OR
#     ($mfg_number <> '' AND p.mfg_number = $mfg_number)

# OPTIONAL MATCH (p)-[:COMPATIBLE_WITH]->(m:Model)
# WHERE $model_number = '' OR m.number = $model_number

# OPTIONAL MATCH (p)-[:FIXES]->(s:Symptom)
# WHERE $symptom = '' OR s.name = $symptom

# WITH p, 
#      CASE WHEN $model_number <> '' AND $symptom <> '' 
#           THEN collect(DISTINCT m) ELSE [] END as models,
#      CASE WHEN $model_number <> '' AND $symptom <> '' 
#           THEN collect(DISTINCT s) ELSE [] END as symptoms

# RETURN {
#     name: p.name,
#     description: p.description,
#     ps_number: p.ps_number,
#     mfg_number: p.mfg_number,
#     models: CASE WHEN $model_number <> '' AND $symptom <> '' 
#                  THEN [m IN models | m.number] ELSE [] END,
#     symptoms_fixed: CASE WHEN $model_number <> '' AND $symptom <> '' 
#                          THEN [s IN symptoms | s.name] ELSE [] END,
#     part_url: p.url
# } as part_info
#         """
        
#         result = tx.run(query, 
#                         ps_number=extracted_info.get('ps_number', ''),
#                         model_number=extracted_info.get('model', ''),
#                         mfg_number=extracted_info.get('mfg_number', ''),
#                         symptom=extracted_info.get('symptom', ''))
        
#         records = list(result)
#         print(records)
#         if not records:
#             return {"message": "No matching part found."}
        
#         return records[0]['part_info']

#     def close(self):
#         self.driver.close()

# class SymptomAnalysisTool:
#     def __init__(self, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
#         self.driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
#         self.model = SentenceTransformer('all-MiniLM-L6-v2')

#     def __call__(self, model: str, symptom: str) -> Dict[str, Any]:
#         parts = self._find_relevant_parts(model, symptom)
#         relevant_qas = self._find_relevant_qas(parts, symptom)
#         return {
#             "model": model,
#             "symptom": symptom,
#             "relevant_parts": parts,
#             "relevant_qas": relevant_qas
#         }

#     def _find_relevant_parts(self, model: str, symptom: str) -> List[Dict[str, Any]]:
#         print(" in find relevant parts")
#         with self.driver.session() as session:
#             result = session.run("""
#                 MATCH (m:Model {name: $model})-[:HAS_ISSUE]->(s:Symptom {name: $symptom})<-[:FIXES]-(p:Part)
#                 RETURN p.name AS name, p.ps_number AS ps_number, p.mfg_number AS mfg_number,
#                        p.description AS description
#                 LIMIT 2
#             """, model=model, symptom=symptom)
#             print("relevant parts", result)
#             return [dict(record) for record in result]

#     def _find_relevant_qas(self, parts: List[Dict[str, Any]], symptom: str) -> List[Dict[str, Any]]:
#         combined_embedding = self.model.encode(symptom + " " + " ".join([p['name'] + p['ps_number'] for p in parts])).tolist()
        
#         with self.driver.session() as session:
#             result = session.run("""
#             CALL db.index.vector.queryNodes('question_embedding', 10, $embedding) 
#             YIELD node as question, score
#             MATCH (question)-[:HAS_ANSWER]->(answer:Answer)
#             RETURN question.text AS question, answer.text AS answer, score
#             ORDER BY score DESC
#             LIMIT 5
#         """, embedding=combined_embedding)
            
#             print("relevant qas", result)
#             return [dict(record) for record in result]

#     def close(self):
#         self.driver.close()


# class CompatibilityCheckerTool:
#     def __call__(self, part: str, model: str) -> Dict[str, Any]:
#         query = """
#         MATCH (p:Part)
# WHERE p.ps_number = $part OR p.mfg_number = $part
# MATCH (p)-[:COMPATIBLE_WITH]->(m:Model {name: $model})
# RETURN COUNT(*) > 0 AS is_compatible
#         """
#         result = query_neo4j(query, {"part": part, "model": model})
#         return {"is_compatible": result[0]["is_compatible"] if result else False}

# # Create tool instances
# info_retrieval_tool = InfoRetrievalTool(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
# symptom_analysis_tool = SymptomAnalysisTool(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
# compatibility_checker_tool = CompatibilityCheckerTool()
# recommendation_tool = RecommendationTool(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# def User_input(state) -> Dict[str, Any]:
#     user_input = input("User: ")
#     state["user_input"] = user_input
#     state["conversation_history"].append(f"User: {user_input}")
#     state["next_step"] = "central_agent"
#     if user_input == "exit":
#         state["next_step"] = "end"
#     return state

# # Update tool_manager to use these tools
# def tool_manager(state) -> Dict[str, Any]:
#     conversation = ' '.join(state["conversation_history"][-5:])
#     extracted_info = state["extracted_info"]
#     prompt = f"""
#     You are the tools manager for agentic architecture. Based on the following conversation, determine which tool(s) to use:
#     {conversation}

#     Available tools:
#     1. Info Retrieval Tool: Use GraphRAG to get information about parts, models, and their relationships.
#     2. Symptom Analysis Tool: Find the relevant parts that can fix the symptom provided the model. Also finds relevant Q&A to figure out the problem and solution.
#     3. Compatibility Checker Tool: Check compatibility between parts and models.
#     4. Part recommendations Tool: Recommend parts based on the user's query.

#     Think step by step. 
#     Select the most appropriate tool(s). Format your response as follows:
#     Selected Tools: [1, 2, 3]
#     Explanation: [Your explanation]
#     """

#     response = client.chat.completions.create(
#         model="gpt-4",
#         messages=[{"role": "system", "content": prompt}]
#     )

#     tool_selection = response.choices[0].message.content
#     for line in tool_selection.split("\n"):
#         if "Selected Tools:" in line:
#             selected_tools = line.split(":")[1].strip().strip("[]").split(",")
#         if "Explanation:" in line:
#             state["tool_explanation"] = line.split(":")[1].strip()

#     # Parse tool selection and use appropriate tools
#     tool_outputs = {}

#     if '1' in selected_tools:
#         tool_outputs["info_retrieval"] = info_retrieval_tool(extracted_info)
#     if '2' in selected_tools:
#         model = extracted_info["model"]
#         symptom = extracted_info["symptom"]
#         if not model or not symptom:
#             return ("Model and symptom are required for symptom analysis")
#         tool_outputs["symptom_analysis"] = symptom_analysis_tool(symptom, model)

#     if '3' in selected_tools:
#         part = extracted_info["ps_number"]
#         if not part:
#             part = extracted_info["mfg_number"]
#         model = extracted_info["model"]
#         if not part or not model:
#             return ("Part and model are required for compatibility check")
#         tool_outputs["compatibility_check"] = compatibility_checker_tool(part, model)

#     if '4' in selected_tools:
#         user_query = state["user_input"]
#         tool_outputs["part_recommendations"] = recommendation_tool.recommend(user_query)
#         tool_outputs["part_recommendations"] = "Recommendations based on user query"

#     state["tool_output"] = tool_outputs
#     state["next_step"] = "response_generation"
#     return state

# def central_agent(state) -> Dict[str, Any]:
#     if "conversation_history" not in state:
#         state["conversation_history"] = []

#     user_input = state["user_input"]
#     state["conversation_history"].append(f"User: {user_input}")

#     prompt = f"""
#     You are a helpful assistant for a refrigerator and dishwasher parts e-commerce website.
#     Your task is to gather all necessary information from the user to assist them effectively.
#     If the user hasn't provided enough information, ask follow-up questions. For example, you can ask: 
#     "What is the model number of your appliance?" or mfg number of the part you are looking for?"

#     However, it is also possible that user might be looking for only some general information about a part. So, based on your judgement decide if you need more information or if you can proceed to use tools.

#     Sometimes, user will just ask for some part recommendations! by providing a use case or just simply give you a product information. In that case, you can directly proceed to tools.
#     Below is the conversation history to give you context:

#     Conversation history:
#     {' '.join(state["conversation_history"][-5:])}

#     Thinks step by step and decide if you need more information or if you can proceed to use tools.
#     If you need more information, ask a question. If you have enough information, say "PROCEED TO TOOLS".
    
#     Finally, after you have all the information, extract the following details based on the query asked by the user and additional information provided:
#     - mfg number of the part ( if applicable )
#     - Part Select number ( PS number ) of the part ( if applicable )
#     - model number of the appliance( if applicable )
#     - symptom or issue with the appliance ( if applicable)

#     Provide your response in a VALID JSON format with the following structure:
#     {{
#         "response": "Your response text",
#         "proceed_to_tools": true/false,
#         "extracted_info": {{
#             "model": "extracted model number or null",
#             "ps_number": "extracted part number or null. This will always start with 'PS' followed by a number",
#             "mfg_number": "extracted manufacturer number or null",
#             "symptom": "extracted symptom/issue or null"
#         }}
#     }}
#     Ensure that your response is in VALID JSON format. Do not output any markdown or enclose the JSON in triple backticks.
#     Your response:
#     """

#     response = client.chat.completions.create(
#         model="gpt-4",
#         messages=[{"role": "system", "content": prompt}]
#     )

#     ai_response = response.choices[0].message.content
#     parsed_response = json.loads(ai_response)
#     state["conversation_history"].append(f"Assistant: {parsed_response['response']}")
#     print("central_agent", parsed_response)

#     if parsed_response["proceed_to_tools"]:
#         state["next_step"] = "tool_manager"
#         state['extracted_info'] = parsed_response["extracted_info"]
#     else:
#         state["next_step"] = "central_agent"
#         print("AI:", parsed_response['response'])
#         state["conversation_history"].append(parsed_response['response'] ) # This will be shown to the user in the actual implementation

#     return state

# def response_generation(state) -> Dict[str, Any]:
#     conversation = ' '.join(state["conversation_history"][-5:])
#     tool_output = state["tool_output"]

#     prompt = f"""
#     Based on the following conversation and tool output, generate a helpful response for the user:

#     Conversation:
#     {conversation}

#     Tool Output:
#     {tool_output}

#     Provide a detailed and helpful response addressing the user's query:
#     """

#     response = client.chat.completions.create(
#         model="gpt-4",
#         messages=[{"role": "system", "content": prompt}]
#     )

#     state["generated_response"] = response.choices[0].message.content
#     state["next_step"] = "judge_agent"
#     return state

# def judge_agent(state) -> Dict[str, Any]:
#     conversation = ' '.join(state["conversation_history"])
#     generated_response = state["generated_response"]

#     prompt = f"""
#     Review the following conversation and generated response:

#     Conversation:
#     {conversation}

#     Generated Response:
#     {generated_response}

#     Evaluate if the response adequately addresses the user's query. If it does, respond with "APPROVED". 
#     If not, explain what needs to be improved or clarified.

#     Your evaluation:
#     """

#     response = client.chat.completions.create(
#         model="gpt-4",
#         messages=[{"role": "system", "content": prompt}]
#     )

#     evaluation = response.choices[0].message.content

#     if "APPROVED" in evaluation:
#         state["final_response"] = generated_response
#         state["next_step"] = "end"
#     else:
#         state["feedback"] = evaluation
#         state["next_step"] = "central_agent"
#         state["user_input"] = f"Please improve the response. Feedback: {evaluation}"

#     return state

# workflow = StateGraph(State)

# # Add nodes
# # workflow.add_node("User_input", User_input)
# # workflow.add_node("Central_agent",central_agent)
# # workflow.add_node("Tool_manager", tool_manager)
# # workflow.add_node("Response_generation",response_generation)
# # workflow.add_node("Judge_agent",judge_agent)

# # # Connect nodes
# # workflow.add_edge(START, "User_input")
# # workflow.add_edge("Tool_manager", "Response_generation")
# # workflow.add_edge("Response_generation", "Judge_agent")

# # # Define the conditional logic for agent selection
# # def decide_next(state):
# #     return state["next_step"]

# # # Set the conditional edges
# # workflow.add_conditional_edges("User_input", decide_next, {"central_agent": "Central_agent", "end": END} )
# # workflow.add_conditional_edges("Central_agent", decide_next, {"user_input": "User_input", "tool_manager": "Tool_manager"})
# # workflow.add_conditional_edges("Judge_agent", decide_next, {"central_agent": "Central_agent", "end": END})

# workflow.add_node("Central_agent", central_agent)
# workflow.add_node("Tool_manager", tool_manager)
# workflow.add_node("Response_generation", response_generation)
# workflow.add_node("Judge_agent", judge_agent)

#     # Connect nodes
# workflow.add_edge(START, "Central_agent")
# workflow.add_edge("Tool_manager", "Response_generation")
# workflow.add_edge("Response_generation", "Judge_agent")

#     # Define the conditional logic for agent selection
# def decide_next(state):
#     return state["next_step"]

#     # Set the conditional edges
# workflow.add_conditional_edges("Central_agent", decide_next, {"central_agent": "Central_agent", "tool_manager": "Tool_manager"})
# workflow.add_conditional_edges("Judge_agent", decide_next, {"central_agent": "Central_agent", "end": END})

# # Compile the graph
# app = workflow.compile()
# display(Image(app.get_graph().draw_mermaid_png()))

# def run_conversation():
#     state = State(
#         user_input="tell me information about part PS429854",
#         conversation_history=[],
#         extracted_info={},
#         tool_output={},
#         next_step="User_input",
#         tool_explanation=[],
#         generated_response="",
#         feedback=""
#     )

    
#         # Invoke the workflow
#     result = app.invoke(state)

#     # Update the state for the next iteration
#     state = result

#     # Print the response (assuming it's stored in generated_response)
#     print("AI:", state['generated_response'])