# Price data quality dashboard

## Imports and set up django environment

In [35]:
from datetime import timedelta
import math
import os
import pandas as pd
import django
from django.db import connection
from django.utils import timezone
import logging
import numpy as np
from IPython.core.display import display, HTML
os.chdir('..')

# Allows async calls to django ORM in Jupyter. Required.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rest.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

from pricedata import models





## Time ORM vs  Raw SQL
Retrieve data into dsataframe. We need:
* Symbol Name;
* Instrument Type Name;
* DataSource Name; and
* Time.

Where Retrieve Price Data is True and period is '1S'

## Django ORM QuerySet then convert to dataframe

In [32]:
%%timeit
price_data =  models.Candle.objects.filter(datasource_symbol__retrieve_price_data=True, period='1S')


df = pd.DataFrame(list(price_data.values('datasource_symbol__symbol__name',
                                                 'datasource_symbol__symbol__instrument_type',
                                                 'datasource_symbol__datasource__name', 'time')))

print(len(df.index))

2147012
2147012
2147012
2147012
2147012
2147012
2147012
2147012
25.4 s ± 1.45 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Raw sql straight into dataframe


In [61]:
%%timeit
sql = """SELECT	s.name AS symbol,
                s.instrument_type AS instrument_type,
                ds.name AS datasource,
                cdl.time AS time
        FROM public.pricedata_datasourcesymbol dss
            INNER JOIN pricedata_datasource ds ON dss.datasource_id = ds.id
            INNER JOIN pricedata_symbol s ON dss.symbol_id = s.id
            INNER JOIN pricedata_candle cdl ON cdl.datasource_symbol_id = dss.id  
        WHERE dss.retrieve_price_data = true
            AND cdl.period = %(period)s"""
    
df = pd.read_sql_query(sql=sql, con=connection, params={'period': '1S'})

print(len(df.index))

2240566


## % difference between results

In [55]:
100 - 22.2 / 25.4 * 100

12.5984251968504

## Results
### Django
Time: 25.4 s ± 1.45 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
Rowcount: 2147012
    
### Raw
Time: 22.2 s ± 221 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Rowcount:214012

#### We have a winner. Raw SQL is 12.5% faster than Django
    


## Get the data using raw SQL, this time without %%timeit

In [2]:
sql = """SELECT	s.name AS symbol,
                s.instrument_type AS instrument_type,
                ds.name AS datasource,
                cdl.time AS time
        FROM public.pricedata_datasourcesymbol dss
            INNER JOIN pricedata_datasource ds ON dss.datasource_id = ds.id
            INNER JOIN pricedata_symbol s ON dss.symbol_id = s.id
            INNER JOIN pricedata_candle cdl ON cdl.datasource_symbol_id = dss.id  
        WHERE dss.retrieve_price_data = true
            AND cdl.period = %(period)s"""
    
df = pd.read_sql_query(sql=sql, con=connection, params={'period': '1S'})

df

Unnamed: 0,symbol,instrument_type,datasource,time
0,CADCHF,FOREX,MT5,2021-07-16 00:00:00+00:00
1,CADCHF,FOREX,MT5,2021-07-16 00:05:01+00:00
2,CADCHF,FOREX,MT5,2021-07-16 00:05:27+00:00
3,CADCHF,FOREX,MT5,2021-07-16 00:05:43+00:00
4,CADCHF,FOREX,MT5,2021-07-16 00:05:51+00:00
...,...,...,...,...
4285989,USDCNH,FOREX,MT5,2021-07-20 10:25:13+00:00
4285990,USDCNH,FOREX,MT5,2021-07-20 10:25:14+00:00
4285991,USDCNH,FOREX,MT5,2021-07-20 10:25:15+00:00
4285992,USDCNH,FOREX,MT5,2021-07-20 10:25:16+00:00


## Add another datasource for testing

In [136]:
nds = df.copy()
nds['datasource'] = 'NDS'
df = df.append(nds)

print(f"NDS has {len(df[df['datasource'] == 'NDS'].index)} rows. MT5 has {len(df[df['datasource'] == 'MT5'].index)} rows.")

df

NDS has 2473058 rows. MT5 has 2473058 rows.


Unnamed: 0,symbol,instrument_type,datasource,time
0,CADCHF,FOREX,MT5,2021-07-16 00:00:00+00:00
1,CADCHF,FOREX,MT5,2021-07-16 00:05:01+00:00
2,CADCHF,FOREX,MT5,2021-07-16 00:05:27+00:00
3,CADCHF,FOREX,MT5,2021-07-16 00:05:43+00:00
4,CADCHF,FOREX,MT5,2021-07-16 00:05:51+00:00
...,...,...,...,...
2473053,USDCNH,FOREX,NDS,2021-07-19 12:25:07+00:00
2473054,USDCNH,FOREX,NDS,2021-07-19 12:25:08+00:00
2473055,USDCNH,FOREX,NDS,2021-07-19 12:25:10+00:00
2473056,USDCNH,FOREX,NDS,2021-07-19 12:25:11+00:00


## Table showing num prices by symbol, by DS for each aggregarion period

In [82]:
# First group by to get min and max times
grouped = df.groupby(['symbol', 'instrument_type', 'datasource']).agg(first=('time', 'min'), last=('time', 'max'), count=('time', 'count'))

# Now get min, max and mean for each aggregation period for each datasource.
# 'minutes': 'T', 'hours': 'H', 'days': 'D', 'weeks': 'W', 'months': 'M'
aggs = {'minutes': 'T', 'hours': 'H', 'days': 'D', 'weeks': 'W', 'months': 'M'}

for key in aggs:
    # Get counts for aggregation period, then group by symbol, instrument type and datasource to get min, max and avg counts for aggregation period
    agg_period_ungrouped = df.groupby(['symbol', 'instrument_type', 'datasource', pd.Grouper(key='time', freq=aggs[key])]).agg(count=('time', 'count'))
    agg_period_grouped = agg_period_ungrouped.groupby(['symbol', 'instrument_type', 'datasource']).agg(min=('count', 'min'), max=('count', 'max'), avg=('count', 'median'))

    # Rename columns to include aggregation period key, then merge into original dataframe
    agg_period_grouped = agg_period_grouped.rename(columns={'min': f'{key}_min', 'max': f'{key}_max', 'avg': f'{key}_avg'})
    grouped = grouped.join(agg_period_grouped, on=['symbol', 'instrument_type', 'datasource'])
    
# Now unstack, so we show columns for each datasource
grouped = grouped.unstack()

# Create the same aggregations but across all datasources
agg_cols = set([x[0] for x in grouped.columns])
ds_cols = set([x[1] for x in grouped.columns])

for agg_col in agg_cols:
    if '_min' in agg_col or agg_col == 'first':
        grouped[(agg_col, 'all')] = grouped[[(agg_col, ds) for ds in ds_cols]].min(axis=1)
    elif '_max' in agg_col or agg_col == 'last':
        grouped[(agg_col, 'all')] = grouped[[(agg_col, ds) for ds in ds_cols]].max(axis=1)
    elif '_avg' in agg_col:
        grouped[(agg_col, 'all')] = grouped[[(agg_col, ds) for ds in ds_cols]].mean(axis=1)
        

# Flatten the columns and reset the index
grouped.columns = ['_'.join(col).strip() for col in grouped.columns]
grouped = grouped.reset_index()
                          
grouped


Unnamed: 0,symbol,instrument_type,first_MT5,last_MT5,count_MT5,minutes_min_MT5,minutes_max_MT5,minutes_avg_MT5,hours_min_MT5,hours_max_MT5,...,weeks_max_all,months_min_all,hours_avg_all,days_max_all,months_max_all,minutes_min_all,minutes_avg_all,days_min_all,weeks_avg_all,months_avg_all
0,AUDCAD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,75431,1,60,27.0,126,3341,...,39258,75431,1499.0,39258,75431,1,27.0,36173,37715.5,75431.0
1,AUDCHF,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:51+00:00,71641,1,60,26.0,99,3310,...,37219,71641,1432.5,37219,71641,1,26.0,34422,35820.5,71641.0
2,AUDJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:52+00:00,76895,1,60,27.0,134,3319,...,40185,76895,1575.5,40185,76895,1,27.0,36710,38447.5,76895.0
3,AUDNZD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,72478,1,60,25.0,117,3338,...,38308,72478,1492.5,38308,72478,1,25.0,34170,36239.0,72478.0
4,AUDUSD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,70547,1,60,24.0,133,3308,...,38767,70547,1351.0,38767,70547,1,24.0,31780,35273.5,70547.0
5,CADCHF,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:23+00:00,54384,1,59,18.0,80,3040,...,29427,54384,958.5,29427,54384,1,18.0,24957,27192.0,54384.0
6,CADJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:26+00:00,64055,1,59,22.0,137,3081,...,33468,64055,1339.0,33468,64055,1,22.0,30587,32027.5,64055.0
7,CHFJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:23+00:00,58093,1,59,20.0,134,2684,...,30346,58093,1178.5,30346,58093,1,20.0,27747,29046.5,58093.0
8,CHFSGD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:58+00:00,41969,1,56,14.0,9,2384,...,22202,41969,743.5,22202,41969,1,14.0,19767,20984.5,41969.0
9,EURAUD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:55+00:00,95460,1,60,36.0,198,3508,...,49112,95460,2128.0,49112,95460,1,36.0,46348,47730.0,95460.0


## We will keep the full data table intact, and create a new one for display purposes. This will:
* Remove the DS agg columns; 
* Move the agg functions into the cells; 
* Rename the columns; and
* Reorder columns.

In [83]:
display = grouped

# Merge the agg functions for _all into cells for each aggregation period
for agg in ['minutes', 'hours', 'days', 'weeks', 'months']:
    # Get the min, max and avg columns and consolidate into a new column. Drop the existing ones.
    min_col = f'{agg}_min_all'
    max_col = f'{agg}_max_all'
    avg_col = f'{agg}_avg_all'
    display[agg] = 'min:' + display[min_col].astype(str) + ' max:' + display[max_col].astype(str) + ' avg:' + display[avg_col].astype(str)

# Rename the columns that we will be keeping
display = display.rename(
    columns={'symbol': 'Symbol', 'instrument_type': 'Instrument Type', 'first_all': 'First Price',
             'last_all': 'Last Price', 'minutes': 'Minutes', 'hours': 'Hours', 'days': 'Days', 'weeks': 'Weeks',
             'months': 'Months'})

# Reorder the columns, excluding any that we dont want to keep
display = display[
    ['Symbol', 'Instrument Type', 'First Price', 'Last Price', 'Minutes', 'Hours', 'Days', 'Weeks', 'Months']]

display

Unnamed: 0,Symbol,Instrument Type,First Price,Last Price,Minutes,Hours,Days,Weeks,Months
0,AUDCAD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,min:1 max:60 avg:27.0,min:126 max:3341 avg:1499.0,min:36173 max:39258 avg:37715.5,min:36173 max:39258 avg:37715.5,min:75431 max:75431 avg:75431.0
1,AUDCHF,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:51+00:00,min:1 max:60 avg:26.0,min:99 max:3310 avg:1432.5,min:34422 max:37219 avg:35820.5,min:34422 max:37219 avg:35820.5,min:71641 max:71641 avg:71641.0
2,AUDJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:52+00:00,min:1 max:60 avg:27.0,min:134 max:3319 avg:1575.5,min:36710 max:40185 avg:38447.5,min:36710 max:40185 avg:38447.5,min:76895 max:76895 avg:76895.0
3,AUDNZD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,min:1 max:60 avg:25.0,min:117 max:3338 avg:1492.5,min:34170 max:38308 avg:36239.0,min:34170 max:38308 avg:36239.0,min:72478 max:72478 avg:72478.0
4,AUDUSD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:47+00:00,min:1 max:60 avg:24.0,min:133 max:3308 avg:1351.0,min:31780 max:38767 avg:35273.5,min:31780 max:38767 avg:35273.5,min:70547 max:70547 avg:70547.0
5,CADCHF,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:23+00:00,min:1 max:59 avg:18.0,min:80 max:3040 avg:958.5,min:24957 max:29427 avg:27192.0,min:24957 max:29427 avg:27192.0,min:54384 max:54384 avg:54384.0
6,CADJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:26+00:00,min:1 max:59 avg:22.0,min:137 max:3081 avg:1339.0,min:30587 max:33468 avg:32027.5,min:30587 max:33468 avg:32027.5,min:64055 max:64055 avg:64055.0
7,CHFJPY,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:23+00:00,min:1 max:59 avg:20.0,min:134 max:2684 avg:1178.5,min:27747 max:30346 avg:29046.5,min:27747 max:30346 avg:29046.5,min:58093 max:58093 avg:58093.0
8,CHFSGD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:58+00:00,min:1 max:56 avg:14.0,min:9 max:2384 avg:743.5,min:19767 max:22202 avg:20984.5,min:19767 max:22202 avg:20984.5,min:41969 max:41969 avg:41969.0
9,EURAUD,FOREX,2021-07-16 00:00:00+00:00,2021-07-19 21:24:55+00:00,min:1 max:60 avg:36.0,min:198 max:3508 avg:2128.0,min:46348 max:49112 avg:47730.0,min:46348 max:49112 avg:47730.0,min:95460 max:95460 avg:95460.0


### This is slow. Timeit shows: 21.9 s ± 60.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

## Tieing it all together
## We will need a bulk upload method and will run this as a task

In [50]:
def bulk_insert_or_update(data: pd.DataFrame, table: str, unique_fields=None, batch_size=None):
    """
    Bulk insert or update (upsert) of price data. If unique fields already exists, then update else insert

    :param data: The pandas dataframe to insert / update to db. The columns in the dataframe must match the table
        columns.
    :param table: The name of the table to update
    :param unique_fields: Fields that will raise the unique key constraint on insert. If none are provided, then we 
        will just do a straight insert rather than upsert.
    :param batch_size: Maximum number of rows to update in one go. If None, then no batching
    :return:
    """

    # Logger
    log = logging.getLogger(__name__)
    
    # Do we have any data
    if data is not None and len(data.index) > 0:
        # Get create fields from dataframe and the update fields as the create fields - unique fields 
        create_fields = data.columns
        update_fields = set(create_fields) - set(unique_fields) if unique_fields is not None else None
        
        # Get batches if we are batching. If not create a single batch with all the data
        batches = [data, ] if not batch_size else np.array_split(data, math.ceil(len(data.index) / batch_size))
        log.info(f'Bulk update to {table}. Rowcount: {len(data.index)}. Update split into {len(batches)} batches of maximum {batch_size} updates.')  # TODO Debug
        
        for i in range(0 , len(batches)):
            batch = batches[i]
            log.info(f'Bulk update to {table}. Batch {i+1} of {len(batches)}.')  # TODO Debug
            
            # Get the values from the data
            values = [tuple(x) for x in batch.to_numpy()]

            # Create cursor
            cursor = connection.cursor()

            # Mogrify values to bind into sql.
            placeholders = ','.join(['%s' for _ in create_fields])
            mogvals = [cursor.mogrify(f"({placeholders})", val).decode('utf8') for val in values]
            
            # Base sql. We will add to it if we are also doing updates
            sql = f"INSERT INTO {table} ({','.join(list(create_fields))}) VALUES {','.join(mogvals)}"

            # If we are doing update, then build build list of x = excluded.x columns for SET part of sql and add to SQL
            if unique_fields is not None:
                on_duplicates = []
                for field in update_fields:
                    on_duplicates.append(field + "=excluded." + field)
                sql += f" ON CONFLICT ({','.join(list(unique_fields))}) DO UPDATE SET {','.join(on_duplicates)}"

            # Update
            log.debug(f"Saving {len(batch.index)} rows to {table}. Mode: {'INSERT' if not update_fields else 'UPSERT'}.")
            cursor.execute(sql)
            cursor.close()
        else:
            log.debug(f"No data to save.")

## The task code to create a batch and populate the summary tables with aggregated data

### Part 1 - Create the batch, get the data and define the aggregation periods

In [39]:
# Create the batch
batch = models.SummaryBatch(time=timezone.now())
batch.save()

In [40]:
# Get all price data
sql = """SELECT	dscp.id AS datasource_candleperiod_id,
                dss.id AS datasource_symbol_id,
                dss.symbol_id AS symbol_id,
                dscp.period AS period,
                cdl.time AS time
        FROM pricedata_datasourcesymbol dss
                INNER JOIN pricedata_candle cdl
                    ON cdl.datasource_symbol_id = dss.id
                INNER JOIN pricedata_datasourcecandleperiod dscp
                    ON dss.datasource_id = dscp.datasource_id  AND 
                        cdl.period = dscp.period
        WHERE dss.retrieve_price_data = true"""

price_data = pd.read_sql_query(sql=sql, con=connection)

### Part 1b - Define the aggregation periods

In [41]:
# Our summary views will contain min, max and mean aggregatesfor each aggregation period
aggs = {'minutes': 'T', 'hours': 'H', 'days': 'D', 'weeks': 'W', 'months': 'M'}

### Part 2 - Create the 2 summaries, by datasource and cross datasource

In [42]:
# We will create 2 summary views, one by datasource and one across datasources
summary_by_ds_groupby =  ['datasource_symbol_id', 'datasource_candleperiod_id']
symmary_across_ds_groupby =  ['symbol_id', 'period']
group_bys = [summary_by_ds_groupby, symmary_across_ds_groupby]
tables = [models.SummaryMetric.objects.model._meta.db_table, models.SummaryMetricAllDatasources.objects.model._meta.db_table]

for i in range(0,2):
    group_by = group_bys[i]
    table = tables[i]
    
    grouped = price_data.groupby(group_by).agg(first_candle_time=('time', 'min'), last_candle_time=('time', 'max'),
                                               num_candles=('time', 'count'))

    for key in aggs:
        # Get counts for aggregation period, then group by datasource symbol and datasource candleperiod to get min,
        # max and avg counts for each aggregation period
        agg_period_ungrouped = \
            price_data.groupby(group_by + [pd.Grouper(key='time', freq=aggs[key]),]).agg(count=('time', 'count'))

        agg_period_grouped = agg_period_ungrouped.groupby(group_by).agg(
            min=('count', 'min'), max=('count', 'max'), avg=('count', 'median'))

        # Rename columns to include aggregation period key, then merge into original dataframe
        agg_period_grouped = agg_period_grouped.rename(
            columns={'min': f'{key}_min', 'max': f'{key}_max', 'avg': f'{key}_avg'})
        grouped = grouped.join(agg_period_grouped, on=group_by)

    # Reset the grouped index so we end up with a flat dataframe    
    grouped = grouped.reset_index()

    # Add the summary batch id
    grouped['summary_batch_id'] = batch.id
                               
    # Insert into db
    bulk_insert_or_update(data=grouped, table=table)

22:13:11 - __main__ - INFO - Bulk update to pricedata_summarymetric. Rowcount: 228. Update split into 1 batches of 10000 updates.
22:13:11 - __main__ - INFO - Bulk update to pricedata_summarymetric. Batch 1 of 1.
22:15:58 - __main__ - INFO - Bulk update to pricedata_summarymetricalldatasources. Rowcount: 228. Update split into 1 batches of 10000 updates.
22:15:58 - __main__ - INFO - Bulk update to pricedata_summarymetricalldatasources. Batch 1 of 1.


### Part 3 - Create the aggregation / time / symbol view for the dashboard charts

In [51]:
# We will also aggregate the times across each aggregation period and symbol for all datasources and periods. We will only aggregate enough data for each aggregation period for 100 plots
grouped = None
group_by = ['datasource_symbol_id', 'datasource_candleperiod_id']
timedeltas_for_aggs = {'minutes': timedelta(minutes=100), 'hours': timedelta(hours=100), 'days': timedelta(days=100), 'weeks': timedelta(weeks=100), 'months': timedelta(weeks=420)}
for key in aggs:
    # To date is the last date available. From date, depending on aggregation period.
    to_date = price_data['time'].max()
    from_date = to_date - timedeltas_for_aggs[key]
    
    # Filter the data
    filtered = price_data[(price_data['time'] >= from_date) & (price_data['time'] <= to_date)]
    
    # Create grouped for agg period
    agg_grouped = filtered.groupby(group_by + [pd.Grouper(key='time', freq=aggs[key]),]).size().reset_index(name='num_candles')
    
    # Add aggregation period and batch
    agg_grouped['aggregation_period'] = key
    agg_grouped['summary_batch_id'] = batch.id
    
    # Add agg group to grouped
    grouped = agg_grouped if grouped is None else grouped.append(agg_grouped)
  
# Insert into db
bulk_insert_or_update(data=grouped, table=models.SummaryAggregation.objects.model._meta.db_table, batch_size=10000)

22:28:55 - __main__ - INFO - Bulk update to pricedata_summaryaggregation. Rowcount: 29604. Update split into 3 batches of maximum 10000 updates.
22:28:55 - __main__ - INFO - Bulk update to pricedata_summaryaggregation. Batch 1 of 3.
22:28:57 - __main__ - INFO - Bulk update to pricedata_summaryaggregation. Batch 2 of 3.
22:28:58 - __main__ - INFO - Bulk update to pricedata_summaryaggregation. Batch 3 of 3.


## Run queries to display output

In [52]:
# Summary batch
data = pd.read_sql_query(sql="SELECT * FROM pricedata_summarybatch", con=connection)
data

Unnamed: 0,id,time
0,23,2021-07-29 21:05:11.024469+00:00


In [53]:
# Summary metric
data = pd.read_sql_query(sql="SELECT * FROM pricedata_summarymetric", con=connection)
data

Unnamed: 0,id,first_candle_time,last_candle_time,num_candles,minutes_min,minutes_max,minutes_avg,hours_min,hours_max,hours_avg,...,days_avg,weeks_min,weeks_max,weeks_avg,months_min,months_max,months_avg,datasource_candleperiod_id,summary_batch_id,datasource_symbol_id
0,1825,2021-07-16 00:00:00+00:00,2021-07-29 21:05:03+00:00,265768,1,60,17,78,3169,985,...,25872,24957,135516,105295,265768,265768,265768,1,23,1
1,1826,2021-07-23 00:06:00+00:00,2021-07-29 21:05:00+00:00,6997,1,1,1,6,60,60,...,1434,1432,5565,3499,6997,6997,6997,2,23,1
2,1827,2021-07-23 00:00:00+00:00,2021-07-29 21:00:00+00:00,118,1,1,1,1,1,1,...,24,24,94,59,118,118,118,3,23,1
3,1828,2021-07-16 00:00:00+00:00,2021-07-27 00:00:00+00:00,8,1,1,1,1,1,1,...,1,1,5,2,8,8,8,4,23,1
4,1829,2021-07-16 00:00:00+00:00,2021-07-29 21:05:05+00:00,310220,1,60,21,114,3125,1241,...,30246,30587,154667,124966,310220,310220,310220,1,23,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,2048,2021-07-23 00:00:00+00:00,2021-07-27 00:00:00+00:00,3,1,1,1,1,1,1,...,1,1,2,2,3,3,3,4,23,58
224,2049,2021-07-16 00:00:00+00:00,2021-07-29 21:05:01+00:00,244494,1,60,16,57,2705,1042,...,24089,22331,122435,99728,244494,244494,244494,1,23,60
225,2050,2021-07-16 00:00:00+00:00,2021-07-29 21:05:00+00:00,14159,1,1,1,6,60,60,...,1433,1431,7167,5561,14159,14159,14159,2,23,60
226,2051,2021-07-16 00:00:00+00:00,2021-07-29 21:00:00+00:00,238,1,1,1,1,1,1,...,24,24,120,94,238,238,238,3,23,60


In [54]:
# Summary metric all datasources
data = pd.read_sql_query(sql="SELECT * FROM pricedata_summarymetricalldatasources", con=connection)
data

Unnamed: 0,id,period,minutes_min,minutes_max,minutes_avg,hours_min,hours_max,hours_avg,days_min,days_max,...,weeks_max,weeks_avg,months_min,months_max,months_avg,summary_batch_id,symbol_id,first_candle_time,last_candle_time,num_candles
0,1369,1D,1,1,1,1,1,1,1,1,...,5,2,8,8,8,23,1,2021-07-16 00:00:00+00:00,2021-07-27 00:00:00+00:00,8
1,1370,1H,1,1,1,1,1,1,22,24,...,94,59,118,118,118,23,1,2021-07-23 00:00:00+00:00,2021-07-29 21:00:00+00:00,118
2,1371,1M,1,1,1,6,60,60,1261,1435,...,5565,3499,6997,6997,6997,23,1,2021-07-23 00:06:00+00:00,2021-07-29 21:05:00+00:00,6997
3,1372,1S,1,60,17,78,3169,985,20941,32372,...,135516,105295,265768,265768,265768,23,1,2021-07-16 00:00:00+00:00,2021-07-29 21:05:03+00:00,265768
4,1373,1D,1,1,1,1,1,1,1,1,...,2,2,3,3,3,23,2,2021-07-23 00:00:00+00:00,2021-07-27 00:00:00+00:00,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,1592,1S,19,60,42,1914,3225,2527,18222,22197,...,104368,76555,200305,200305,200305,23,58,2021-07-16 10:00:00+00:00,2021-07-29 17:59:59+00:00,200305
224,1593,1D,1,1,1,1,1,1,1,1,...,2,2,3,3,3,23,60,2021-07-23 00:00:00+00:00,2021-07-27 00:00:00+00:00,3
225,1594,1H,1,1,1,1,1,1,22,24,...,120,94,238,238,238,23,60,2021-07-16 00:00:00+00:00,2021-07-29 21:00:00+00:00,238
226,1595,1M,1,1,1,6,60,60,1261,1435,...,7167,5561,14159,14159,14159,23,60,2021-07-16 00:00:00+00:00,2021-07-29 21:05:00+00:00,14159


In [55]:
# Summary aggregations
data = pd.read_sql_query(sql="SELECT * FROM pricedata_summaryaggregation", con=connection)
data

Unnamed: 0,id,time,aggregation_period,num_candles,datasource_candleperiod_id,datasource_symbol_id,summary_batch_id
0,29508320,2021-07-29 19:25:00+00:00,minutes,11,1,1,23
1,29508321,2021-07-29 19:26:00+00:00,minutes,11,1,1,23
2,29508322,2021-07-29 19:27:00+00:00,minutes,25,1,1,23
3,29508323,2021-07-29 19:28:00+00:00,minutes,22,1,1,23
4,29508324,2021-07-29 19:29:00+00:00,minutes,28,1,1,23
...,...,...,...,...,...,...,...
29599,29537919,2021-07-31 00:00:00+00:00,months,3,4,58,23
29600,29537920,2021-07-31 00:00:00+00:00,months,244494,1,60,23
29601,29537921,2021-07-31 00:00:00+00:00,months,14159,2,60,23
29602,29537922,2021-07-31 00:00:00+00:00,months,238,3,60,23
