In [1]:
from __future__ import annotations

import cProfile
import pstats
from asyncio import sleep

In [2]:
from py_research.db import (
    DataSource,
    TableMap,
    SubTableMap,
    SubMap,
    SelIndex,
    DataBase
)

In [3]:
from schema import (
    Search,
    Project,
    Task,
    User,
    Membership,
    Organization,
    SearchResult,
    TestSchema,
)

In [4]:
from typing import Any


async def dummy_loader(obj: dict[str, Any]) -> dict:
    await sleep(0.2)
    return {
        'name': obj["name"],
        'role': obj["role"],
        'age': obj["age"] + 1
    }

In [5]:
data_source = DataSource(
    target=Search,
    push={
        "resultCount": Search.result_count,
        "search": Search.term,
        "results": SubTableMap(
            target=Search.results,
            rel_map=TableMap(push={SearchResult.score}),
            push={
                "project_name": Project.name,
                "project_start": Project.start,
                "project_end": Project.end,
                "project_status": Project.status,
                "tasks": SubTableMap(
                    target=Project.tasks,
                    push={
                        "task_name": Task.name,
                        "task_assignees": SubTableMap(
                            target=Task.assignees,
                            push=User.name,
                            match_by=[User.name],
                        ),
                        "task_status": Task.status,
                    },
                ),
                "members": SubTableMap(
                    target=Project.members,
                    loader=dummy_loader,
                    push={User.name, User.age},
                    match_by=[User.name],
                    rel_map=TableMap(
                        push={
                            Membership.role,
                        },
                    ),
                ),
            },
            pull={
                Project.org: SubMap(
                    push={
                        "organization_name": Organization.name,
                        "organization_address": Organization.address,
                        "organization_city": Organization.city,
                        "organization_countries": Organization.ccs,
                    },
                    match_by=[Organization.name, Organization.city],
                ),
                Project.number: SelIndex()
            },
        ),
    },
)

In [6]:
from datetime import date


data = {
    "resultCount": 3,
    "search": "test",
    "results": [
        {
            "score": 0.9,
            "project_name": "baking cake",
            "project_start": date(2020, 1, 1),
            "project_end": date(2020, 1, 4),
            "project_status": "done",
            "organization_name": "Bakery",
            "organization_address": "Main Street 1",
            "organization_city": "Bakerville",
            "organization_countries": ["USA", "Mexico", "Canada"],
            "tasks": [
                {
                    "task_name": "task1",
                    "task_assignees": ["John"],
                    "task_status": "todo",
                },
                {
                    "task_name": "task2",
                    "task_assignees": ["John"],
                    "task_status": "todo",
                },
                {
                    "task_name": "task3",
                    "task_assignees": ["Jane"],
                    "task_status": "done",
                },
            ],
            "members": [
                {"name": "John", "role": "baker", "age": 30},
                {"name": "John", "role": "manager", "age": 40},
            ],
        },
        {
            "score": 0.8,
            "project_name": "cleaning shoes",
            "project_start": date(2020, 1, 2),
            "project_end": date(2020, 1, 5),
            "project_status": "done",
            "organization_name": "Shoe Shop",
            "organization_address": "Main Street 2",
            "organization_city": "Shoetown",
            "organization_countries": ["USA", "Mexico"],
            "tasks": [
                {
                    "task_name": "task4",
                    "task_assignees": ["John"],
                    "task_status": "todo",
                },
                {
                    "task_name": "task5",
                    "task_assignees": ["Jane"],
                    "task_status": "todo",
                },
            ],
            "members": [
                {"name": "John", "role": "cleaner", "age": 25},
                {"name": "Jane", "role": "manager", "age": 35},
            ],
        },
        {
            "score": 0.7,
            "project_name": "fixing cars",
            "project_start": date(2020, 1, 3),
            "project_end": date(2020, 1, 6),
            "project_status": "started",
            "organization_name": "Car Shop",
            "organization_address": "Main Street 3",
            "organization_city": "Cartown",
            "organization_countries": ["USA", "Canada"],
            "tasks": [
                {
                    "task_name": "task6",
                    "task_assignees": ["John"],
                    "task_status": "todo",
                }
            ],
            "members": [
                {"name": "John", "role": "mechanic", "age": 45},
                {"name": "Jane", "role": "manager", "age": 55},
                {"name": "Jack", "role": "manager", "age": 65},
            ],
        },
    ],
}

In [7]:
from pathlib import Path
import warnings

with warnings.catch_warnings():
  warnings.simplefilter("error")
  with cProfile.Profile() as pr:
    db = DataBase(schema=TestSchema, url=Path("./test.xlsx"))
    rec = await data_source.load([data], db=db)

    stats = pstats.Stats(pr)

                                                                           

In [8]:
db[User].values()


[1m[[0m
    [1;35mUser[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'John'[0m, [32m'age'[0m: [1;36m31[0m, [32m'_id'[0m: [32m'1fe150f8-c59b-4c57-b37f-7da370fc9344'[0m[1m}[0m[1m)[0m,
    [1;35mUser[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'Jane'[0m, [32m'age'[0m: [1;36m56[0m, [32m'_id'[0m: [32m'c2cf358d-c55c-4df6-9cda-eb9a5d835759'[0m[1m}[0m[1m)[0m,
    [1;35mUser[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'Jack'[0m, [32m'age'[0m: [1;36m66[0m, [32m'_id'[0m: [32m'a3ac4b15-d79d-4cb7-9f56-d689b3aa55f3'[0m[1m}[0m[1m)[0m
[1m][0m

In [9]:
db[Search.term].values()

[1m[[0m[32m'test'[0m[1m][0m

In [10]:
list(db[Search.result_count].items())

[1m[[0m[1m([0m[32m'test'[0m, [1;36m3[0m[1m)[0m[1m][0m

In [11]:
list(db[Search.results.x.org].items())


[1m[[0m
    [1m([0m
        [1m([0m[32m'test'[0m, [1;36m0[0m[1m)[0m,
        [1;35mOrganization[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'Shoe Shop'[0m, [32m'address'[0m: [32m'Main Street 2'[0m, [32m'city'[0m: [32m'Shoetown'[0m, [32m'_id'[0m: [32m'bb4e82dd-9ac9-4bed-9242-844fb75b5d37'[0m[1m}[0m[1m)[0m
    [1m)[0m,
    [1m([0m
        [1m([0m[32m'test'[0m, [1;36m1[0m[1m)[0m,
        [1;35mOrganization[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'Bakery'[0m, [32m'address'[0m: [32m'Main Street 1'[0m, [32m'city'[0m: [32m'Bakerville'[0m, [32m'_id'[0m: [32m'78907f17-67bf-4ba5-8e2f-34e8301faef0'[0m[1m}[0m[1m)[0m
    [1m)[0m,
    [1m([0m
        [1m([0m[32m'test'[0m, [1;36m2[0m[1m)[0m,
        [1;35mOrganization[0m[1m([0m[1m{[0m[32m'name'[0m: [32m'Car Shop'[0m, [32m'address'[0m: [32m'Main Street 3'[0m, [32m'city'[0m: [32m'Cartown'[0m, [32m'_id'[0m: [32m'b38087ce-7b8a-4c12-a954-27bc0a5bebe0'[0m[1m}

In [12]:
list(db[Search.results].items())[0][1].start.year

In [13]:
db[Search.results].df()

schema_Search_term,start,name,status,org__id,number,end
str,str,str,str,str,i64,str
"""test""","""2020-01-01""","""baking cake""","""done""","""bb4e82dd-9ac9-4bed-9242-844fb7…",0,"""2020-01-04"""
"""test""","""2020-01-02""","""cleaning shoes""","""done""","""78907f17-67bf-4ba5-8e2f-34e830…",1,"""2020-01-05"""
"""test""","""2020-01-03""","""fixing cars""","""started""","""b38087ce-7b8a-4c12-a954-27bc0a…",2,"""2020-01-06"""


In [14]:
db[Organization].df()

schema_Organization__id,city,address,_id,name
str,str,str,str,str
"""d46d70dd-a3a3-4933-bd15-8f3db6…","""Bakerville""","""Main Street 1""","""d46d70dd-a3a3-4933-bd15-8f3db6…","""Bakery"""
"""e2c659cd-df92-42b7-a828-163821…","""Shoetown""","""Main Street 2""","""e2c659cd-df92-42b7-a828-163821…","""Shoe Shop"""
"""4de83e34-12c2-4835-a552-7514c4…","""Cartown""","""Main Street 3""","""4de83e34-12c2-4835-a552-7514c4…","""Car Shop"""


In [15]:
db.describe()


[1m{[0m
    [32m'schema'[0m: [1m{[0m
        [32m'package'[0m: [32m'py-research'[0m,
        [32m'module'[0m: [32m'schema'[0m,
        [32m'object'[0m: [32m'TestSchema'[0m,
        [32m'package_version'[0m: [32m'^5.1.1.dev148+g44d4d92.d20250123'[0m,
        [32m'repo'[0m: [32m'https://github.com/cloudlane-one/py-research.git'[0m,
        [32m'repo_revision'[0m: [32m'44d4d92865c3ff6427ce606c03da9c732e82548f'[0m,
        [32m'module_path'[0m: [1;35mWindowsPath[0m[1m([0m[32m'exp/schema.py'[0m[1m)[0m,
        [32m'module_dirty'[0m: [3;91mFalse[0m,
        [32m'docs_url'[0m: [32m'https://cloudlane-one.github.io/py-research/'[0m
    [1m}[0m,
    [32m'contents'[0m: [1m{[0m
        [32m'records'[0m: [1m{[0m
            [32m'schema.Organization'[0m: [1;36m3[0m,
            [32m'schema.Task'[0m: [1;36m6[0m,
            [32m'schema.Search'[0m: [1;36m1[0m,
            [32m'schema.Project'[0m: [1;36m3[0m,
            [32m's

In [None]:
db[SearchResult._table[SearchResult.score.isin(slice(0.81, 0.9))]].df()

schema_SearchResult__id,_to_number,_from_term,_id,score
str,i64,str,str,f64
"""5714306633""",1,"""test""","""5714306633""",0.9


In [None]:
db[Organization.ccs].df()

schema_Organization__id,schema_Organization_183742__id,_value
str,i64,str
"""d46d70dd-a3a3-4933-bd15-8f3db6…",0,"""USA"""
"""d46d70dd-a3a3-4933-bd15-8f3db6…",1,"""Mexico"""
"""d46d70dd-a3a3-4933-bd15-8f3db6…",2,"""Canada"""
"""e2c659cd-df92-42b7-a828-163821…",0,"""USA"""
"""e2c659cd-df92-42b7-a828-163821…",1,"""Mexico"""
"""4de83e34-12c2-4835-a552-7514c4…",0,"""USA"""
"""4de83e34-12c2-4835-a552-7514c4…",1,"""Canada"""


In [18]:
stats.sort_stats("cumulative")
stats.print_stats("py_research")

         483559 function calls (449490 primitive calls) in 0.670 seconds

   Ordered by: cumulative time
   List reduced from 2280 to 157 due to restriction <'py_research'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       56    0.000    0.000    0.217    0.004 C:\Users\lworm\Repos\py-research\src\py_research\db\datasources.py:838(load)
       52    0.000    0.000    0.212    0.004 C:\Users\lworm\Repos\py-research\src\py_research\db\datasources.py:697(_load_rec_from_item)
       52    0.001    0.000    0.209    0.004 C:\Users\lworm\Repos\py-research\src\py_research\db\datasources.py:572(_load_record)
   168/57    0.001    0.000    0.207    0.004 C:\Users\lworm\Repos\py-research\src\py_research\db\datasources.py:665(_load_records)
       19    0.000    0.000    0.191    0.010 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:1467(__ior__)
       19    0.001    0.000    0.191    0.010 C:\Users\lworm\Repos\py-research\src\py_research\db\databas

   0.152    0.009 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:3045(_mutate_from_records)
16352/7843    0.014    0.000    0.145    0.000 C:\Users\lworm\Repos\py-research\src\py_research\caching.py:382(__get__)
       45    0.000    0.000    0.131    0.003 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:722(keys)
       45    0.000    0.000    0.125    0.003 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:881(df)
       16    0.000    0.000    0.105    0.007 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:2882(_df_to_table)
       47    0.000    0.000    0.086    0.002 C:\Users\lworm\Repos\py-research\src\py_research\caching.py:364(__call__)
       47    0.000    0.000    0.086    0.002 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:637(select)
      101    0.000    0.000    0.050    0.000 C:\Users\lworm\Repos\py-research\src\py_research\db\databases.py:2444(_sql_col)
 6906/265    0.006    0.000    

[1m<[0m[1;95mpstats.Stats[0m[39m object at [0m[1;36m0x00000231CF3264E0[0m[1m>[0m