# Data Analysis

In [11]:
import json
import os
import warnings
from typing import Literal

import psycopg2
from neo4j import GraphDatabase
from pymongo import MongoClient

warnings.filterwarnings("ignore")

In [3]:
QUERY_PATH = "../scripts/queries"

## Query loading

In [None]:
def load_query(name: str) -> str:
    with open(os.path.join(QUERY_PATH, name), "r") as file:
        return file.read()

## Execute queries

In [18]:
def execute_postgres(query_name: str):
    with psycopg2.connect(
        dbname="ecommerce",
        user="postgres",
    ) as conn:
        with conn.cursor() as cursor:
            query = load_query(f"{query_name}.sql")
            cursor.execute(query)
            return cursor.fetchall()


def execute_mongo(
    query_name: str,
    collection_name: str,
    action: Literal["find", "aggregate"] = "aggregate",
):
    with MongoClient("mongodb://localhost:27017/") as client:
        collection = client["bd-a2"][collection_name]
        query = json.loads(load_query(f"{query_name}.json"))
        if action == "aggregate":
            return list(collection.aggregate(query))
        if action == "find":
            return collection.find(query)


def execute_neo4j(query_name: str):
    with GraphDatabase.driver(
        "bolt://localhost:7687/neo4j", auth=("neo4j", "11111111")
    ) as driver:
        with driver.session() as session:
            query = load_query(f"{query_name}.cypher")
            results = session.run(query)  # type: ignore
            return list(results)

## Query execution

### Q1

Analyze the data to find out whether campaigns attracted the customers to purchase products. The business aims to engage more customers in next campaigns by leveraging the social network info.

In [79]:
def execute_q1() -> float:
    postgres_res = execute_postgres("q1")
    mongo_res = execute_mongo("q1", "campaigns")
    neo4j_res = execute_neo4j("q1")

    postgres_res_float = postgres_res[0][0]
    mongo_res_float = mongo_res[0]["purchase_ratio"]
    neo4j_res_float = neo4j_res[0]["purchase_ratio"]

    assert postgres_res_float == mongo_res_float == neo4j_res_float

    return postgres_res_float


purchase_personalization_ratio = execute_q1()
purchase_personalization_ratio

2.175478391655022

### Q2

The business has logged behavior of customers on the platform. Use this information to find out which top personalized recommended products we can display in the home page of the user.

In [119]:
# For user 563016948
def execute_q2() -> list[int]:
    postgres_res = execute_postgres("q2")
    mongo_res = execute_mongo("q2", "users")
    neo4j_res = execute_neo4j("q2")

    postgres_res_list = [x[0] for x in postgres_res]
    mongo_res_list = [int(x["_id"]) for x in mongo_res]
    neo4j_res_list = [x["p.product_id"] for x in neo4j_res]

    assert postgres_res_list == mongo_res_list == neo4j_res_list

    return postgres_res_list


popular_products = execute_q2()
popular_products

[1005135,
 28717064,
 7004807,
 1003317,
 7005751,
 1005105,
 1004258,
 1004839,
 1005112,
 7004492]

### Q3

Based on the top products you obtained above, find out which products can be retrieved by full text search for products based on keywords from the category_code of the products.

In [123]:
postgres_res = execute_postgres("q3")
postgres_res

[(1000894, 'electronics.smartphone'),
 (1000978, 'electronics.smartphone'),
 (1001588, 'electronics.smartphone'),
 (1001605, 'electronics.smartphone'),
 (1001606, 'electronics.smartphone'),
 (1002042, 'electronics.smartphone'),
 (1002062, 'electronics.smartphone'),
 (1002098, 'electronics.smartphone'),
 (1002099, 'electronics.smartphone'),
 (1002100, 'electronics.smartphone'),
 (1002101, 'electronics.smartphone'),
 (1002102, 'electronics.smartphone'),
 (1002103, 'electronics.smartphone'),
 (1002225, 'electronics.smartphone'),
 (1002266, 'electronics.smartphone'),
 (1002308, 'electronics.smartphone'),
 (1002310, 'electronics.smartphone'),
 (1002367, 'electronics.smartphone'),
 (1002396, 'electronics.smartphone'),
 (1002398, 'electronics.smartphone'),
 (1002415, 'electronics.smartphone'),
 (1002460, 'electronics.smartphone'),
 (1002482, 'electronics.smartphone'),
 (1002484, 'electronics.smartphone'),
 (1002494, 'electronics.smartphone'),
 (1002497, 'electronics.smartphone'),
 (1002522, '