In [None]:
from google.cloud import secretmanager

from faunadb import query as q
from faunadb.objects import Ref
from faunadb.client import FaunaClient

def get_client():
    print('grabbing secret')

    client = secretmanager.SecretManagerServiceClient()
    secret_name = "fauna_deepcite_db"
    project_id = "deepcite-306405"

    request = {"name": f"projects/{project_id}/secrets/{secret_name}/versions/latest"}
    response = client.access_secret_version(request)
    secret_string = response.payload.data.decode("UTF-8")

    return FaunaClient(secret=secret_string, domain='db.us.fauna.com')

client = get_client()

In [None]:
from datetime import datetime
import pandas as pd
pd.options.display.max_colwidth = 100
import json

def fauna_to_df(fauna):
  data = [doc['data'] for doc in fauna['data']]
  df = pd.DataFrame.from_records(data)

  return df

def grab_fauna(collection_name, size = 100000):
  return client.query(
    q.map_(
      lambda x: q.get(x),
      q.paginate(q.documents(q.collection(collection_name)), size=size)
    )
  )



In [None]:
call_fauna = grab_fauna('deepcite_call')
call_df_fauna = fauna_to_df(call_fauna).drop_duplicates(subset='id', keep="first")
call_df_fauna.head(2)

In [None]:
import psycopg2
import pandas as pd
pd.options.display.max_colwidth = 100
import json

with open('database_config.json') as json_file:
    db_config = json.load(json_file)['gcp']
conn = psycopg2.connect(host=db_config['host'], user=db_config['user'], password=db_config['password'], database=db_config['database'], port=db_config['port'])
sql = 'SELECT * FROM "deepcite_call" ORDER by "created_at" DESC;'
call_df_postgres = pd.read_sql_query(sql, conn)
call_df_postgres.head(2)

In [None]:
len(call_df_postgres)
# pd.read_sql_query('select count(*) from "deepcite_call"', conn)

In [None]:
missing_call_df = call_df_postgres[~call_df_postgres['id'].isin(call_df_fauna['id'])]
print(len(missing_call_df))

same_call_df = call_df_postgres[call_df_postgres['id'].isin(call_df_fauna['id'])]
print(len(same_call_df))

call_df_fauna_which_match_postgres = call_df_fauna[call_df_fauna['id'].isin(same_call_df['id'])] ## Remove the two rows only present in fauna
print(f'Check if length of entries in postges, and also in fauna are entirely contained in fauna: {len(call_df_fauna_which_match_postgres) == len(same_call_df)}')


## Prepare missing json data for fauna

In [None]:
def ordered(obj):
    if isinstance(obj, dict):
        return sorted((k, ordered(v)) for k, v in obj.items())
    if isinstance(obj, list):
        return sorted(ordered(x) for x in obj)
    else:
        return obj

def check_json_matches(a,b):
    return ordered(a) == ordered(b)

same_json = json.loads(same_call_df.to_json(orient='records'))
same_json_created_at_removed = same_json.copy()
for obj in same_json_created_at_removed:
    obj.pop('created_at')

fauna_json = [doc['data'] for doc in call_fauna['data']]
fauna_json_which_is_in_postgres = [entry for entry in fauna_json if entry['id'] in same_call_df['id'].unique()]
len(same_json) - len(fauna_json_which_is_in_postgres)
check_json_matches(same_json, same_json_created_at_removed)


In [None]:
# print(ordered(same_json)[20])
# print()
# print(ordered(fauna_json_which_is_in_postgres[20]))

print([obj['created_at'] for obj in same_json if obj['id'] == '22433693-e66a-4083-a3ae-627acddbfa15'])
print([obj.keys() for obj in fauna_json_which_is_in_postgres if obj['id'] == '22433693-e66a-4083-a3ae-627acddbfa15'])

In [None]:
json_missing = json.loads(missing_call_df[:5].to_json(orient='records'))
json_missing

In [None]:
result = client.query(
  q.do(
    q.create(
      q.ref(q.collection("deepcite_call")),
      {"data": {"name": "Orwen"}}
    ),
    
  )
)
print(result)