In [1]:
import logging
import asyncio
import os
import sys
import json

import pandas as pd
import matplotlib.pyplot as plt

# XXX temporary hack to import from src
try:
    path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "src")
except:
    path = "~/gith/domschl/indrajala/python_indrajala/indralib/src"
    # expand ~
    path = os.path.expanduser(path)
print(path)
sys.path.append(path)

from indra_event import IndraEvent
from indra_client import IndraClient
from indra_downloader import IndraDownloader

/Users/dsc/gith/domschl/indrajala/python_indrajala/indralib/src


In [2]:
logging.basicConfig(
    format="%(asctime)s %(levelname)s %(name)s %(message)s", level=logging.INFO
)


In [3]:

dl = IndraDownloader(cache_dir="geodata/cache")
dfs = dl.get_datasets(data_sources_dir="geodata/data_sources")
for df_name in dfs:
    print("-----------------------------------------------")
    print(df_name)
    print(dfs[df_name]["metadata"])
    print(dfs[df_name]["data"].head())
print(f"Number of datasets: {len(dfs)}")


2023-06-22 12:57:08,416 INFO Downloader processing: geodata/data_sources/MaunaLoaCO2MonthlyMean.toml
2023-06-22 12:57:08,417 INFO Downloader indra_imports: {'manualoaco2monthlymean': ["'time_jd_start'=fracyear2julia(imp['decimal date'])", "'mean/co' = imp['average']", "'deseasonalized/co2' = imp['deseasonalized']"]}
2023-06-22 12:57:09,224 INFO Downloader Read https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv from cache at geodata/cache/co2_mm_mlo.csv
2023-06-22 12:57:09,225 INFO Downloader Creating dataset manualoaco2monthlymean
2023-06-22 12:57:09,234 INFO Downloader processing: geodata/data_sources/EuropeanMeanTemperatureSinceRomanTime_EuroMed2k.toml
2023-06-22 12:57:10,026 INFO Downloader Read https://www.ncei.noaa.gov/pub/data/paleo/pages2k/EuroMed2k/eujja_2krecon_nested_cps.txt from cache at geodata/cache/eujja_2krecon_nested_cps.txt
2023-06-22 12:57:10,027 INFO Downloader Creating dataset euromed2k
2023-06-22 12:57:10,038 INFO Downloader processing: geodata/data_source

-----------------------------------------------
manualoaco2monthlymean
{'title': 'Mauna Loa CO2 monthly mean data', 'description': 'Data from March 1958 through April 1974 have been obtained by C. David Keeling of the Scripps Institution of Oceanography (SIO) and were obtained from the Scripps website (scrippsco2.ucsd.edu). Monthly mean CO2 constructed from daily mean values Scripps data downloaded from http://scrippsco2.ucsd.edu/data/atmospheric_co2 Monthly values are corrected to center of month based on average seasonal cycle. Missing days can be asymmetric which would produce a high or low bias. Missing months have been interpolated, for NOAA data indicated by negative stdev and uncertainty. We have no information for SIO data about Ndays, stdv, unc so that they are also indicated by negative numbers', 'authors': ['Dr. Pieter Tans', 'Dr. Ralph Keeling'], 'publication_date': '2022-06', 'last_update': '2022-06', 'publisher': 'Dr. Pieter Tans, NOAA/GML (gml.noaa.gov/ccgg/trends/) and 

: 

In [4]:
from datetime import timedelta, datetime, timezone

def convert_partial_year(number):
    year = int(number)
    d = timedelta(days=(number - year) * 365.25)
    day_one = datetime(year, 1, 1, tzinfo=timezone.utc)
    date = d + day_one
    return date

In [5]:
cl = IndraClient(config_file="ws_indra.toml", verbose = True)
await cl.init_connection(verbose=True)

<websockets.legacy.client.WebSocketClientProtocol at 0x161381710>

In [6]:
async def async_entire_history(domain):
    # data = await asyncio.create_task(cl.get_wait_history(domain, -1000000000))
    return await cl.get_wait_history(domain, -1000000000)

In [40]:
dataset = 'manualoaco2monthlymean'
df = dfs[dataset]['data']
print(df.head())
md = dfs[dataset]['metadata']
print(md['indra_id'], md['indra_domain'])
 
domain = md['indra_domain'].replace("{indra_id}", md['indra_id']).replace(f"{dataset}", dataset)
print(domain)

# existing_data = await async_entire_history(domain)
# print(f"Start, existing data: {len(existing_data)} records")

# Enum rows:
dups=0
for i, row in df.iterrows():
    # get column 'decimal year':
    # print(f"{i}: {row['year']}-{row['month']}  z{row['decimal date']}, {row['average']}")
    # convert fractional year into datetime:
    dt = convert_partial_year(row['decimal date'])
    jd = IndraEvent.datetime2julian(dt)
    found = False
    for (j, e) in enumerate(existing_data):
        # print(jd,e[0])
        if abs(jd-e[0])<0.0001:
            # print(f"Found existing data for {jd}")
            found = True
            dups += 1
            break
    if found:
        continue
    ie = IndraEvent()
    ie.domain = domain
    ie.from_id = "py/MaunaLoaMonthlyCO2"
    ie.to_scope = "public"
    ie.time_jd_start = jd
    ie.data_type = "number/float/co2"
    ie.data = json.dumps(row['average'])
    print("Send new: ", ie.to_json())
    # await cl.send_event(ie)
print(f"Already existing data: {dups} records, new: {len(df)-dups} records")

2023-06-22 09:53:28,973 INFO IndraClient Sending: {"domain": "$trx/db/req/history", "from_id": "ws/python", "uuid4": "5aa12381-c081-4826-89df-ce8ab620e3a9", "to_scope": "", "time_jd_start": 2460117.828807565, "data_type": "historyrequest", "data": "{\"domain\": \"$event/geodata/historical/{dataset}/{column}\", \"time_jd_start\": -1000000000, \"time_jd_end\": null, \"limit\": null, \"data_type\": \"number/float%\", \"mode\": \"Sample\"}", "auth_hash": "", "time_jd_end": null}
2023-06-22 09:53:28,976 INFO IndraClient Received message: {"domain":"Ws.1/127.0.0.1:50848","from_id":"SQLx.1","uuid4":"5aa12381-c081-4826-89df-ce8ab620e3a9","to_scope":"$event/geodata/historical/{dataset}/{column}","time_jd_start":2460117.82880758,"data_type":"vector/tuple/jd/float","data":"[]","auth_hash":null,"time_jd_end":2460117.828807587}
2023-06-22 09:53:28,976 INFO IndraClient ---------------------------------------------------------------
2023-06-22 09:53:28,976 INFO IndraClient Future: trx event $event/ge

   year  month  decimal date  average  deseasonalized  ndays  sdev  unc
0  1958      3     1958.2027   315.70          314.43    NaN   NaN  NaN
1  1958      4     1958.2877   317.45          315.16    NaN   NaN  NaN
2  1958      5     1958.3699   317.51          314.71    NaN   NaN  NaN
3  1958      6     1958.4548   317.24          315.14    NaN   NaN  NaN
4  1958      7     1958.5370   315.86          315.18    NaN   NaN  NaN
manualoaco2monthlymean $event/geodata/historical/{dataset}/{column}
$event/geodata/historical/{dataset}/{column}
Start, existing data: 0 records
Send new:  {"domain": "$event/geodata/historical/{dataset}/{column}", "from_id": "py/MaunaLoaMonthlyCO2", "uuid4": "6efe0c57-8577-44ec-b7a1-6c9fa42d0622", "to_scope": "public", "time_jd_start": 2436278.536175, "data_type": "number/float/co2", "data": "315.7", "auth_hash": "", "time_jd_end": null}
Send new:  {"domain": "$event/geodata/historical/{dataset}/{column}", "from_id": "py/MaunaLoaMonthlyCO2", "uuid4": "55f7ac49-

In [44]:
domain

'$event/geodata/historical/{dataset}/{column}'

In [45]:
data = await async_entire_history(domain)


2023-06-22 09:54:15,707 INFO IndraClient Sending: {"domain": "$trx/db/req/history", "from_id": "ws/python", "uuid4": "1d3b74ec-6dca-41bb-ae57-35c5c5ef9a48", "to_scope": "", "time_jd_start": 2460117.82934847, "data_type": "historyrequest", "data": "{\"domain\": \"$event/geodata/historical/{dataset}/{column}\", \"time_jd_start\": -1000000000, \"time_jd_end\": null, \"limit\": null, \"data_type\": \"number/float%\", \"mode\": \"Sample\"}", "auth_hash": "", "time_jd_end": null}
2023-06-22 09:54:15,711 INFO IndraClient Received message: {"domain":"Ws.1/127.0.0.1:50848","from_id":"SQLx.1","uuid4":"1d3b74ec-6dca-41bb-ae57-35c5c5ef9a48","to_scope":"$event/geodata/historical/{dataset}/{column}","time_jd_start":2460117.829348492,"data_type":"vector/tuple/jd/float","data":"[]","auth_hash":null,"time_jd_end":2460117.829348503}
2023-06-22 09:54:15,712 INFO IndraClient ---------------------------------------------------------------
2023-06-22 09:54:15,712 INFO IndraClient Future: trx event $event/ge

In [46]:
data

[]

2023-06-22 09:57:08,713 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,715 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,715 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,716 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,716 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,717 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,717 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,718 ERROR IndraClient Could not receive message: no close frame received or sent
