In [None]:
!pip install neo4j
!pip3 install tomlkit
!pip3 install -U neo4j
!pip install py2neo

In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

drive.mount('/content/drive')
df = pd.read_csv('/combined_data.csv')
print(df.head())

In [None]:
#DATA CLEANING

df['price'] = pd.to_numeric(df['price'], errors='coerce')

df.fillna({
    'rating': 0,
    'title': 'Unknown',
    'parent_asin': 'Unknown',
    'user_id': 'Unknown',
    'average_rating': 0,
    'rating_number': 0,
    'price': 0,
    'store': 'Unknown',
    'categories': 'Unknown'
}, inplace=True)

print(df.head())


In [None]:
rom neo4j import GraphDatabase
import time

uri = "neo4j://8.tcp.ngrok.io:17322 -> localhost:7687"
username = "neo4j"
password = "apan5400"

driver = GraphDatabase.driver(uri, auth=(username, password))

def test_connection():
    retries = 5
    while retries > 0:
        try:
            with driver.session() as session:
                session.run("RETURN 'Successfully connected to Neo4j!' AS greeting").single()[0]
                print("Successfully connected to Neo4j!")
                return
        except Exception as e:
            print(f"Failed to connect to Neo4j: {e}, retrying in 5 seconds...")
            time.sleep(5)
            retries -= 1
        finally:
            if retries == 0:
                print("All retries failed.")

# Test the connection
test_connection()



In [None]:
def execute_query(driver, query, parameters=None):
    with driver.session() as session:
        # Run the Cypher query
        result = session.run(query, parameters)
        return result

In [None]:
# Function to create nodes and relationships in Neo4j
def create_graph(driver, products, users, categories, stores):
    with driver.session() as session:
        # Add Product nodes
        for product in products:
            session.run("MERGE (p:Product {asin: $asin}) "
                        "SET p.title = $title, p.average_rating = $average_rating, p.price = $price",
                        product)

        # Add User nodes
        for user in users:
            session.run("MERGE (u:User {user_id: $user_id})", user)

        # Add Category nodes
        for category in categories:
            session.run("MERGE (c:Category {name: $name})", category)

        # Add Store nodes
        for store in stores:
            session.run("MERGE (s:Store {name: $name})", store)

        # Add PURCHASED relationships
        for purchase in purchases:
            session.run("MATCH (u:User {user_id: $user_id})"
                        "MATCH (p:Product {asin: $asin})"
                        "MERGE (u)-[:PURCHASED {rating: $rating}]->(p)",
                        purchase)

        # Add BELONGS_TO relationships
        for product_category in product_categories:
            session.run("MATCH (p:Product {asin: $asin})"
                        "MATCH (c:Category {name: $category_name})"
                        "MERGE (p)-[:BELONGS_TO]->(c)",
                        product_category)

        # Add SOLD_BY relationships
        for product_store in product_stores:
            session.run("MATCH (p:Product {asin: $asin})"
                        "MATCH (s:Store {name: $store_name})"
                        "MERGE (p)-[:SOLD_BY]->(s)",
                        product_store)
# Execute the graph creation
create_graph(driver, products, users, categories, stores)

# Close the driver connection when done
driver.close()

In [None]:
from neo4j import GraphDatabase

# Function to find the most popular category
def find_most_popular_category(driver):
    with driver.session() as session:
        result = session.run("""
            MATCH (p:Product)-[:BELONGS_TO]->(c:Category)<-[:PURCHASED]-(:User)
            RETURN c.name AS Category, COUNT(*) AS Purchases
            ORDER BY Purchases DESC
            LIMIT 1
        """)
        return result.single()

# Function to find the most popular product
def find_most_popular_product(driver):
    with driver.session() as session:
        result = session.run("""
            MATCH (p:Product)<-[:PURCHASED]-(:User)
            RETURN p.title AS Product, COUNT(*) AS Purchases
            ORDER BY Purchases DESC
            LIMIT 1
        """)
        return result.single()

# Connect to the Neo4j database

uri = "neo4j://8.tcp.ngrok.io:17322 -> localhost:7687"
username = "neo4j"
password = "apan5400"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Find and print the most popular category
most_popular_category = find_most_popular_category(driver)
if most_popular_category:
    print(f"The most popular category is: {most_popular_category['Category']} with {most_popular_category['Purchases']} purchases.")
else:
    print("No popular category found.")

# Find and print the most popular product
most_popular_product = find_most_popular_product(driver)
if most_popular_product:
    print(f"The most popular product is: {most_popular_product['Product']} with {most_popular_product['Purchases']} purchases.")
else:
    print("No popular product found.")

# Close the driver connection when done
driver.close()

In [None]:
# Function to find frequent pairs of products
def find_frequent_product_pairs(driver):
    query = """
    MATCH (u:User)-[:PURCHASED]->(p1:Product)
    WITH u, p1
    MATCH (u)-[:PURCHASED]->(p2:Product)
    WHERE p1 <> p2 AND id(p1) < id(p2)
    RETURN p1.title AS Product1, p2.title AS Product2, COUNT(*) AS Frequency
    ORDER BY Frequency DESC
    LIMIT 10
    """
    with driver.session() as session:
        result = session.run(query)
        return pd.DataFrame([record.data() for record in result])


# Connect to the Neo4j database
uri = "neo4j://8.tcp.ngrok.io:17322 -> localhost:7687"
username = "neo4j"
password = "apan5400"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Find frequent product pairs
frequent_pairs_df = find_frequent_product_pairs(driver)
print(frequent_pairs_df)
# Close the driver connection when done
driver.close()

In [None]:
#The plan:

#Product Nodes: Each product can be a node, with properties like title, average_rating, price, etc.
#User Nodes: Each user can also be a node, with a user_id property.
#Category Nodes: Categories can be nodes as well, with the category name as a property.
#Store Nodes: Stores are also nodes, with the store name as a property.
#PURCHASED Relationships: Connect User nodes to Product nodes to represent a purchase. The rating could be a property of this relationship.
#BELONGS_TO Relationship: Connect Product nodes to Category nodes to represent the categories each product belongs to.
#SOLD_BY Relationship: Connect Product nodes to Store nodes to represent the store selling the product.