In [1]:
# imports
import os
import sys
# import time
# from typing import Callable
import distogram
from datetime import datetime
import pickle
import jsonpickle
# from dataclasses import dataclass
import base64
import time
from dotenv import load_dotenv

from dateutil.relativedelta import relativedelta

from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker

from common import (Base,
    LabelledDistogram, make_distribution, make_distogram, AggregationType)

try:
    import zoneinfo
except ImportError:
    from backports import zoneinfo


In [2]:
# methods
def return_test_engine(string):

    project_id = os.environ.get('DEVSHELL_PROJECT_ID')
    postgres_user = os.environ.get('POSTGRES_USER')
    postgres_password = os.environ.get('POSTGRES_PASSWORD')
    dataset = 'default_dataset'

    if database == "bigquery":
        engine = create_engine(f'bigquery://{project_id}/{dataset}')
    elif database == "sqlite-memory":
        engine = create_engine('sqlite:///:memory:', echo=True)
    elif database == "sqlite-disk":
        engine = create_engine('sqlite:///./localdb', echo=True)
    elif database == "postgres":
        if not postgres_user:
            print(f"ERROR: postgres_user {postgres_user} is not defined.")
            sys.exit()
        else:
            engine = create_engine(
                f'postgresql://{postgres_user}:{postgres_password}'
                '@localhost:5432/google_cloud_dev')
    else:
        print(f"ERROR: database {database} is not recognized")
        sys.exit()
    return engine



In [3]:
# develop/debug
load_dotenv()

database_list = [
    "bigquery", "sqlite-memory", "sqlite-disk", "postgres"]
database = database_list[3]

engine = return_test_engine(database)
print(engine)

Engine(postgresql://crkrenn:***@localhost:5432/google_cloud_dev)


In [4]:
LabelledDistogram.__table__

Table('distograms', MetaData(), Column('primary_key', String(), table=<distograms>, primary_key=True, nullable=False), Column('data_source', String(), table=<distograms>), Column('variable_name', String(), table=<distograms>), Column('datetime', DateTime(timezone=True), table=<distograms>), Column('distogram_string', String(), table=<distograms>), schema=None)

In [5]:
Base.metadata.create_all(engine)

In [6]:

Session = sessionmaker(bind=engine)

session = Session()

data = make_distribution()
h = make_distogram(data)
print(f"min/max {h.min} {h.max}")
now = datetime.utcnow()
now = now.replace(tzinfo=zoneinfo.ZoneInfo('Etc/UTC'))
print(now)

d = LabelledDistogram(
    data_source="dev",
    variable_name="x",
    datetime=now ,
    aggregation_type="every",
    distogram=h)
d2 = LabelledDistogram(
    data_source="dev",
    variable_name="x",
    datetime=now ,
    aggregation_type="every",
    distogram=h)

print(f"d: {d}")
print(f"d2: {d2}")
print(f"h_pickle: {len(d.distogram_string)}")
print(f"d.__table__\n{d.__table__}")

session.add(d)
session.add(d2)

print(f"session.new: {session.new}")
session.commit()
print(f"session.new: {session.new}")
print(f"d: {d}")
print(f"d2: {d2}")

min/max -3.8945120289647206 3.741983862207812
2021-11-13 14:54:28.952159+00:00
d: <LabelledDistogram(data_source='dev', primary_key='35abaa50-760c-4469-9371-9284d20d3d6a', variable_name='x', datetime='2021-11-13 14:54:28.952159+00:00', aggregation_type='every', min/max='-3.8945120289647206/3.741983862207812
d2: <LabelledDistogram(data_source='dev', primary_key='76994efb-0d83-42ae-b4bc-6fc0bfdd07cb', variable_name='x', datetime='2021-11-13 14:54:28.952159+00:00', aggregation_type='every', min/max='-3.8945120289647206/3.741983862207812
h_pickle: 6319
d.__table__
distograms
session.new: IdentitySet([<LabelledDistogram(data_source='dev', primary_key='35abaa50-760c-4469-9371-9284d20d3d6a', variable_name='x', datetime='2021-11-13 14:54:28.952159+00:00', aggregation_type='every', min/max='-3.8945120289647206/3.741983862207812, <LabelledDistogram(data_source='dev', primary_key='76994efb-0d83-42ae-b4bc-6fc0bfdd07cb', variable_name='x', datetime='2021-11-13 14:54:28.952159+00:00', aggregation_ty

In [7]:
session.commit()

In [8]:
session.dirty

IdentitySet([])

In [9]:
for instance in (
    session.query(LabelledDistogram).order_by(
        LabelledDistogram.primary_key)):
    print(instance.primary_key, instance.variable_name)

35abaa50-760c-4469-9371-9284d20d3d6a x
58fc5026-70d7-44bb-b7f8-b5e616015815 x
6ee5242c-889f-4f32-86f9-303af5eac6e7 x
76994efb-0d83-42ae-b4bc-6fc0bfdd07cb x
