In [1]:
import os
from google.cloud import firestore
from google.oauth2 import service_account
import pandas as pd
import altair as alt


# Path to the service account JSON key file
key_path = "../vimo-server-firestore-credentials.json"

# Load the service account credentials
creds = service_account.Credentials.from_service_account_file(key_path)

# Initialize the Firestore client with the credentials
db = firestore.Client(credentials=creds)

In [2]:
def graph2matrix(edges, vertices):
    # Create the adjacency matrix
    adj_matrix = [[0 for i in range(len(vertices))] for j in range(len(vertices))]
    vertex_dict = {vertex: index for index, vertex in enumerate(vertices)}
    
    for edge in edges:
        edge_tuple = edge.split(' -> ')
        start_vertex = vertex_dict[edge_tuple[0]]
        end_vertex = vertex_dict[edge_tuple[1]]
        adj_matrix[start_vertex][end_vertex] = 1
        
    return adj_matrix

In [3]:
# Get a reference to the collection
collection_ref = db.collection("sketches")

# Get all documents in the collection
docs = collection_ref.get()

data_dict = {'nodes':[], 'nodes_num': [], 'nodes_constraints': [], 'edges':[], 'edges_num': [], 'edges_constraints':[],  'sketch_mat': [], 'num_results': [], 'timestamp':[]}
for doc in docs:
    print(f"{doc.id} => {doc.to_dict()}")
    data = doc.to_dict()
    
    nodes = sorted([node['label'] for node in data['sketch']['nodes']])
    data_dict['nodes'].append(", ".join(nodes))
    data_dict['nodes_num'].append(len(nodes))
    nodes_constraints = set()
    for node in data['sketch']['nodes']:
        try:
            for prop in node['properties'].keys():
                nodes_constraints.add(prop)
        except AttributeError as e:
            print(e)
    data_dict['nodes_constraints'].append(", ".join(nodes_constraints))
    
    edges = [edge['label'] for edge in data['sketch']['edges']]
    data_dict['edges'].append(", ".join(edges))
    data_dict['edges_num'].append(len(edges))
    edges_constraints = set()
    for edge in data['sketch']['edges']:
        try:
            for prop in edge['properties'].keys():
                edges_constraints.add(prop)
        except AttributeError as e:
            print(e)
    data_dict['edges_constraints'].append(", ".join(edges_constraints))

    data_dict['sketch_mat'].append(graph2matrix(edges, nodes))
    data_dict['num_results'].append(data['numberOfResults'])
    data_dict['timestamp'].append(data['timestamp'])

1qSvHmLnYHAjr4bMsFp3 => {'numberOfResults': 1, 'timestamp': 1682365924.272949, 'sketch': {'dimension': {'height': 238.796875, 'width': 544.328125}, 'nodes': [{'label': 'A', 'properties': {'EB': 'hidden'}, 'position': ['Point', 85, 185], 'tree': None, 'index': 0}, {'index': 1, 'label': 'B', 'position': ['Point', 476, 191], 'tree': None, 'properties': None}, {'properties': None, 'label': 'C', 'tree': None, 'position': ['Point', 265, 31], 'index': 2}], 'edges': [{'label': 'A -> B', 'properties': {'CX': 'hidden'}, 'tree': None, 'indices': [0, 1], 'index': 0}, {'label': 'B -> C', 'properties': {'weight': 'hidden'}, 'tree': None, 'index': 1, 'indices': [1, 2]}, {'index': 2, 'indices': [2, 0], 'label': 'C -> A', 'properties': None, 'tree': None}]}}
'NoneType' object has no attribute 'keys'
'NoneType' object has no attribute 'keys'
'NoneType' object has no attribute 'keys'
532QGyBggCgG4l5HbMzd => {'timestamp': 1682365985.721128, 'numberOfResults': 1, 'sketch': {'edges': [{'properties': None, '

In [4]:
data_dict

{'nodes': ['A, B, C',
  'A, B, C',
  'A, B, C',
  'A, B, C',
  'A, B, C',
  'A, B, C, D',
  'A, B, C',
  'A, B, C',
  'A, B, C',
  'A, B, C',
  'A, B, C, D'],
 'nodes_num': [3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4],
 'nodes_constraints': ['EB',
  'bodyId',
  'CRE(L)',
  'CX',
  'EB, type',
  'MB(R), aL(R)',
  'LAL(R), type',
  'type',
  'bodyId',
  'type',
  'CX'],
 'edges': ['A -> B, B -> C, C -> A',
  'B -> A, B -> C, A -> B, C -> A',
  'A -> C, A -> B, B -> C',
  'A -> B, B -> C, C -> A',
  'B -> A, B -> C, A -> B, C -> A',
  'B -> A, C -> A, A -> D, A -> C',
  'A -> C, B -> C, C -> B, C -> A, A -> B',
  'A -> B, A -> C, B -> C',
  'A -> B, B -> C, C -> A',
  'A -> C, C -> B, B -> A',
  'B -> C, A -> C, C -> D, D -> C'],
 'edges_num': [3, 4, 3, 3, 4, 4, 5, 3, 3, 3, 4],
 'edges_constraints': ['weight, CX',
  '',
  'weight',
  'weight, CX',
  'EB, LX(R), CX',
  'MB(R), weight',
  '',
  'EB',
  '',
  'weight',
  ''],
 'sketch_mat': [[[0, 1, 0], [0, 0, 1], [1, 0, 0]],
  [[0, 1, 0], [1, 0, 1], [

In [5]:
df = pd.DataFrame.from_dict(data_dict)

In [6]:
df

Unnamed: 0,nodes,nodes_num,nodes_constraints,edges,edges_num,edges_constraints,sketch_mat,num_results,timestamp
0,"A, B, C",3,EB,"A -> B, B -> C, C -> A",3,"weight, CX","[[0, 1, 0], [0, 0, 1], [1, 0, 0]]",1,1682366000.0
1,"A, B, C",3,bodyId,"B -> A, B -> C, A -> B, C -> A",4,,"[[0, 1, 0], [1, 0, 1], [1, 0, 0]]",1,1682366000.0
2,"A, B, C",3,CRE(L),"A -> C, A -> B, B -> C",3,weight,"[[0, 1, 1], [0, 0, 1], [0, 0, 0]]",1,1682366000.0
3,"A, B, C",3,CX,"A -> B, B -> C, C -> A",3,"weight, CX","[[0, 1, 0], [0, 0, 1], [1, 0, 0]]",1,1682366000.0
4,"A, B, C",3,"EB, type","B -> A, B -> C, A -> B, C -> A",4,"EB, LX(R), CX","[[0, 1, 0], [1, 0, 1], [1, 0, 0]]",1,1681220000.0
5,"A, B, C, D",4,"MB(R), aL(R)","B -> A, C -> A, A -> D, A -> C",4,"MB(R), weight","[[0, 0, 1, 1], [1, 0, 0, 0], [1, 0, 0, 0], [0,...",1,1682366000.0
6,"A, B, C",3,"LAL(R), type","A -> C, B -> C, C -> B, C -> A, A -> B",5,,"[[0, 1, 1], [0, 0, 1], [1, 1, 0]]",1,1682366000.0
7,"A, B, C",3,type,"A -> B, A -> C, B -> C",3,EB,"[[0, 1, 1], [0, 0, 1], [0, 0, 0]]",1,1682366000.0
8,"A, B, C",3,bodyId,"A -> B, B -> C, C -> A",3,,"[[0, 1, 0], [0, 0, 1], [1, 0, 0]]",1,1682366000.0
9,"A, B, C",3,type,"A -> C, C -> B, B -> A",3,weight,"[[0, 0, 1], [1, 0, 0], [0, 1, 0]]",1,1682366000.0


In [7]:
bars = alt.Chart(df).mark_bar().encode(
    x='nodes_num:O', 
    y=alt.Y('count(nodes_num):Q'), 
)
bars

In [8]:
bars = alt.Chart(df).mark_bar().encode(
    x='edges_num:O', 
    y=alt.Y('count(edges_num):Q'), 
)
bars

In [9]:
bars = alt.Chart(df).mark_bar().encode(
    x='num_results:O', 
    y=alt.Y('count(num_results):Q'), 
)
bars

In [10]:
d = df.copy()
stacked = d['nodes_constraints'].str.split(',', expand=True).stack().map(lambda x: x.strip()).reset_index(drop=True)
counts = stacked.value_counts()
counts_df = pd.DataFrame({'category': counts.index, 'count': counts.values}).reset_index(drop=True)

bars = alt.Chart(counts_df).mark_bar().encode(
    x='category:O',
    y='count:Q'
)
bars

In [14]:
d = df.copy()
stacked = d['edges_constraints'].str.split(',', expand=True).stack().map(lambda x: x.strip()).reset_index(drop=True)
counts = stacked.value_counts()
counts_df = pd.DataFrame({'category': counts.index, 'count': counts.values}).reset_index(drop=True)

bars = alt.Chart(counts_df).mark_bar().encode(
    x='category:O',
    y='count:Q'
)
bars