In [None]:
import numpy as np
import matplotlib.pyplot as plt
from citros_data_analysis import data_access as da
from prettytable import PrettyTable, ALL
print(f"CITROS version {da.get_version()}")
batch = os.getenv('bid')
print(f"batch id: {os.getenv('bid')}")
print(f"sid: {os.getenv('sid')}")
if not batch:
    print("please add context batch...")
citros = da.CitrosDB(batch = os.getenv('bid'))

In [None]:
#show the version
print(da.get_version())

In [None]:
citros.info().print()

In [None]:
citros.topic('/lunar_starship/state').info().print()

In [None]:
#To plot a result, for example 'data.dyn_local_level.lla[1]' vs. 'rid' separetly for each sid, execute:
citros.topic('/lunar_starship/state').data(['data.data[0]'])\
      .set_index(['rid','sid']).unstack()['data.data[0]'].plot()

In [None]:
#If the amount of data is too large, sampling functions skip(), avg() and move_avg() may be applied.
#skip(n) is used to select every nth message.
#Limits on sid, rid and time are applyied before selection.
#Selection for each sid is performed separately.

#The code below select only each 10th messages:
print(citros.topic('/dynamics')
            .sid([1,2]).rid(start = 5, end = 80).time(start = 5)
            .skip(10)
            .data(["data.dyn_inertial.f_b[0]","data.dyn_local_level.lla[1]"]))

In [None]:
#Instead of just skipping messages, each n messages may be everaged by avg(n).
#Limits on sid, rid and time are applyied before averaging.
#Averaging for each sid is performed separately.
#The value of 'rid' for each everaged range is set as a minimum value among the averaged rid values.
#Only numeric values may be averaged and the labels of the json-data columns with numeric content 
#should be explicitly listed in data([]).

#To everage each 10 messages:
print(citros.topic('/dynamics')
            .sid([1,2]).rid(start = 5, end = 80).time(start = 5)
            .avg(10)
            .data(["data.dyn_inertial.f_b[0]","data.dyn_local_level.lla[1]"]))

In [None]:
#Use move_avg(n, m) to apply moving average over n messages and select each m-th row of the result.
#Limits on sid, rid and time are applyied before averaging and selection.
#Averaging and selection for each sid is performed separately.
#The value of 'rid' for each everaged range is set as a minimum value among the averaged rid values.
#Only numeric values may be averaged and the labels of the json-data columns with numeric content 
#should be explicitly listed in data([]).

#To average each 10 messages and select every second row of the result:
print(citros.topic('/dynamics')
            .sid([1,2]).rid(start = 5, end = 80).time(start = 5)
            .move_avg(10,2)
            .data(["data.dyn_inertial.f_b[0]","data.dyn_local_level.lla[1]"]))

In [None]:
#To get sizes of tables in the current schema, use citros.get_batch_size()
#Each resulting row contains name of the batch, batch size and total size with indexes

result = citros.get_batch_size()

#Use PrettyTable to print the result
table = PrettyTable(field_names=['batch', 'size', 'total_size'], align='l')
table.add_rows(result)
print(table)

In [None]:
#Print unique values or unique combination of values.

#For example, to get all possible topics:
column_names = ['topic']
result = citros.get_unique_values(column_names)

#print the result
print(result)

In [None]:
#To get unique combination of values, specify list of the columns in "column_names".
#For example, to get unique combination of topic-type:
column_names = ['topic', 'type']

#Some constraintes may be specified by filter_by(), for example to get all possible types for topics '/config' and '/dynamics':
filter_by = {'topic' : ['/config', '/dynamics']}
result = citros.get_unique_values(column_names, filter_by = filter_by)

#print the result
table = PrettyTable(field_names=column_names, align='r')
table.add_rows(result)
print(table)

In [None]:
#Print max values of the "column_name"

#name of the column to find max value
column_name = 'sid'

#Some constraints:
filter_by = {'topic' : ['/dynamics']}

result = citros.get_max_value(column_name, filter_by)
print("max value of the column '{}' : {}".format(column_name, result))

In [None]:
#Print min values of the "column_names"

#name of the column to find min value
column_name = 'sid'

#Some constraints:
filter_by = {'topic' : ['/dynamics']}


result = citros.get_min_value( column_name, filter_by)
print("min value of the column '{}' : {}".format(column_name, result))

In [None]:
#Data structure can be also be shown by citros.get_data_structure()
#It returns topic, type and json-data structure.

#leave only topics of interest or leave topic = [] to see information for all topics:
topic = ['/config']

result = citros.get_data_structure(topic)

#use prettytable to print the result:
header = ['topic', 'type', 'data']
table = PrettyTable(field_names=header, align='r')
table.align['data'] = 'l'
table.hrules = ALL
table.add_rows(result)
print(table)

In [None]:
#Print number of the rows in the column "column_name"

#name of the column of interest
column_name = 'type'

#some constraints
filter_by = {'topic' : ['/dynamics'], 'sid' : [1]}

#group the result, for example by topics:
group_by = ['topic']
#or show the total counts with group_by = []

counts = citros.get_counts(column_name, filter_by = filter_by, group_by = group_by)

#print the result:
if len(group_by) == 0:
    print("number of rows in column '{}' : {}".format(column_name, counts[0][0]))
else:
    print("number of rows in column '{}':".format(column_name))
    table = PrettyTable(field_names= group_by + ['counts'], align='r')
    table.add_rows(counts)
    table.border = False
    print(table)

In [None]:
#Print number of the unique values in the column "column_name"

#name of the column of interest
column_name = 'type'

#some constraints
filter_by = {'topic' : ['/dynamics'], 'sid' : [1]}

#group the result, for example by topics:
group_by = ['topic']
#or show the total counts with group_by = []

counts = citros.get_unique_counts(column_name, filter_by = filter_by, group_by = group_by)
if len(group_by) == 0:
    print("number of unique values in column '{}' : {}".format(column_name, counts[0][0]))
else:
    print("number of unique values in column '{}':".format(column_name))
    table = PrettyTable(field_names= group_by + ['unique_counts'], align='r')
    table.add_rows(counts)
    table.border = False
    print(table)

In [None]:
#This way of downloading data allows to specify more precisly what to download.
#It may be used instead of citros.topic(topic_name).data() in the following cases:
#-when constraints must be applied to a json-data columns;
#-when the order of the output must be specified;
#-when some of the columns, such as time, sid, rid, topic, type (all but json-data column) are not desired in the ouput;
#Then the function citros.get_data(...) may be helpful.

#Specify topic:
topic = '/dynamics'  

#to download all data from json column "data": data_query = []. This way json objects will be separeted into columns.
#if the "data" column is desired as a json object, data_query = ["data"]
#if only some of the json objects are needed, for example "data.dyn_inertial.f_b" and "data.dyn_local_level.lla", 
# data_query = ["data.dyn_inertial.f_b" and "data.dyn_local_level.lla"]
#if there are json arrays in the data, to collect the values specify index: data_query = ["data.dyn_local_level.lla[0]"]
data_query = ["data.dyn_local_level.lla[0]", "data.dyn_inertial.f_b[0]"]

#Additional columns are all columns, but the "data" column.
#to get all columns, additional_columns = []
additional_columns = ['rid','sid']

#Apply constraints.
#To get exact values, specify them in a list: for example, sid equals 1 or 2: filter_by = {'sid' : [1,2]}

#To apply lower and upper limits, specify them in a dictionary by key words:
#-'gt' & 'gte' for '>' & '>='
#-'lt' & 'lte' for '<' & '<='
#for example, to set 1 < sid <= 6 : filter_by = {'sid': {'gt' : 1, 'lte' : 6}

#All this constraints may be applied to the json-data values as well, if they are numeric

#for no filter, leave filter_by = {}
filter_by = {'sid':[1,2], "rid": {'gt':10, 'lte':1000}, "data.dyn_inertial.f_b[0]": {'gt': 0}}

#order the result by some columns in descending aor ascending order.
#leave order_by = {} for the default order
order_by = {'sid':'desc', 'rid':'asc'}

#save the result in a "df"
df = citros.get_data(topic, data_query, additional_columns = additional_columns, 
                                     filter_by = filter_by, order_by = order_by)
print('length of the output table: ', len(df))
print(df)

In [None]:
#Function get_data(...) may be combined with the way we set topic, rid, sid and time previously:

df = citros.topic('/dynamics')\
           .sid([1,2])\
           .rid(start = 11, end = 1000)\
           .get_data(data_query = ["data.dyn_local_level.lla[0]", "data.dyn_inertial.f_b[0]"],
                     additional_columns = ['rid','sid'], 
                     filter_by = {"data.dyn_inertial.f_b[0]": {'gt': 0}}, 
                     order_by = {'sid':'desc', 'rid':'asc'})
print(df)

In [None]:
#plot simple graph 'data.dyn_local_level.lla[0]' vs 'rid' for different sid

citros.plot_graph(df, 'rid', 'data.dyn_local_level.lla[0]', '-')

In [None]:
#Sampling function, such as skipping, averaging and moving everage may be applied too.
#They are specified by 'method': 'skip', 'avg' or 'move_avg'

#Method 'skip' selects every n_skip-th messages.
#For each 'sid' selection is performed independently.
#Constraints on additional columns are applied BEFORE n_skip-th selection, while
#constraints on columns from json-data are applied AFTER n_skip-th selection.

#Applying 'skip' to the previous query:

df = citros.get_data(topic, data_query, additional_columns = additional_columns, 
                                     filter_by = filter_by, order_by = order_by, method = 'skip', n_skip = 5)
print('length of the output table: ', len(df))
print(df)

In [None]:
#Method 'avg' averages each n_avg messages.
#For each 'sid' averaging is performed independently.
#The output value of 'rid' for each everaged range is set as a minimum value among the averaged rid values.
#Constraints on additional columns are applied BEFORE averaging, while
#constraints on columns from json-data are applied AFTER averaging.

df = citros.get_data(topic, data_query, additional_columns = additional_columns, 
                                     filter_by = filter_by, order_by = order_by, method = 'avg', n_avg = 5)
print('length of the output table: ', len(df))
print(df)

In [None]:
#Method 'move_avg' performes moving averaging over n_avg messages and select every n_skip-th rows of the result.
#For each 'sid' averaging and selection are performed independently.
#The output value of 'rid' for each everaged range is set as a minimum value among the averaged rid values.
#Constraints on additional columns are applied BEFORE averaging and selection, while
#constraints on columns from json-data are applied AFTER averaging and selection.

df = citros.get_data(topic, data_query, additional_columns = additional_columns, 
                                     filter_by = filter_by, order_by = order_by, method = 'move_avg', n_avg = 10, n_skip = 2)
print('length of the output table: ', len(df))
print(df)

In [None]:
#get tables with different sid separetly by citros.get_sid_tables()
#the returning dictionary containes sid as dictionary keys and tables as dictionary values
#such sampling methods as skipping, averaging and calculation of moving average may be applied too.

#topic:
topic = '/dynamics' 

#columns from "data"
data_query = ["data.dyn_inertial.f_b[0]","data.dyn_local_level.lla[1]"]

#columns outside "data"
additional_columns = ['sid','rid','time']

#constraints
#Limits for non-data columns are applied BEFORE skipping rows, 
#while constraints for json-data columns are applied AFTER skipping
filter_by = {'sid':[1,2], "rid": {'gt':10, 'lte':1000}, "data.dyn_inertial.f_b[0]": {'gt': 0}}

#order of the result
order_by = {'sid':'desc', 'rid':'asc'}

#average over 10 rows
n_avg = 10

dfs = citros.get_sid_tables(topic, data_query, additional_columns = additional_columns, 
                                     filter_by = filter_by, order_by = order_by, method = 'avg', n_avg = n_avg)
print('sid values are: {}\n'.format(list(dfs.keys())))
print('table with sid = 1:')
print(dfs[1])

In [None]:
#The end!