In [1]:
%load_ext autoreload
%autoreload 2

In [36]:
from utils.DBUpdater import DBUpdater
DBUpdater().update()


# Data Formatting

In [None]:
from urllib.parse import urljoin
from dotenv import load_dotenv
from bson import ObjectId
import pandas as pd
import numpy as np
import requests
import json
import os
load_dotenv()

In [3]:
from modules.database_tables import Database
from modules.database_tables.Orders import Orders
from modules.database_tables.Products import Products
from modules.database_tables.TaskCards import TaskCards
from modules.database_tables.ProductOperations import Operations

In [4]:
username = os.getenv("mongodb_user")
password = os.getenv("mongodb_password")

In [5]:
class GSHubAPI:
    def __init__(self) -> None:
        self.domain: str = "https://premo.gshub.nl/"
    
    def learn_url(self, days_in_past: int) -> str:
        return urljoin(self.domain, f"api/dinand/learn/{days_in_past}")
    
    @property
    def play_url(self) -> str:
        return urljoin(self.domain, "api/dinand/play/")

In [9]:
play_url = GSHubAPI().play_url

In [14]:
db = Database(username, password)
orders = Orders(username, password)
products = Products(username, password)
taskcards = TaskCards(username, password)
operations = Operations(username, password)

In [59]:
input_dict: list[dict] = []
output_dict: list[dict] = []

for order in orders.get_all():
    order_id = order["_id"]
    
    products_arr = products.find_many({"order_id": order_id})
    product_ids = [prod["_id"] for prod in products_arr]
    
    taskcards_arr = taskcards.find_many({"product_id": {"$in": product_ids}})
    operations_arr = operations.find_many({"taskcard_id": {"$in": [tc["_id"] for tc in taskcards_arr]}})
    
    for tc in taskcards_arr:
        data = {k: v for k, v in order.items() if k != "_id"}
        data.update({f"{k}": v for k, v in products_arr[0].items() if not k in ["_id", "order_id"]})
        data.update({f"taskcard.{k}": v for k, v in tc.items() if not k in ["_id", 'product_id']})
        output_dict.append(data)

In [60]:
def safe_to_datetime(x):
    try:
        return pd.to_datetime(x, format='%Y-%m-%dT%H:%M:%S.%fZ')
    except (ValueError, TypeError):
        return x

In [None]:
outputDF = pd.DataFrame(output_dict)
outputDF = outputDF.map(lambda x: safe_to_datetime(x) if isinstance(x, str) else x)
outputDF