In [1]:
import logging
import asyncio
import os
import sys
import json

import pandas as pd
import matplotlib.pyplot as plt

# XXX temporary hack to import from src
try:
    path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "src")
except:
    path = "~/gith/domschl/indrajala/python_indrajala/indralib/src"
    # expand ~
    path = os.path.expanduser(path)
print(path)
sys.path.append(path)

from indra_event import IndraEvent  # type: ignore
from indra_client import IndraClient  # type: ignore
from indra_downloader import IndraDownloader  # type: ignore

/home/dsc/gith/domschl/indrajala/python_indrajala/indralib/src


In [2]:
logging.basicConfig(
    format="%(asctime)s %(levelname)s %(name)s %(message)s", level=logging.INFO
)


In [3]:

dl = IndraDownloader(cache_dir="geodata/cache")
dfs = dl.get_datasets(data_sources_dir="geodata/data_sources")
for df_name in dfs:
    print("-----------------------------------------------")
    print(df_name)
    print(dfs[df_name]["metadata"])
    print(dfs[df_name]["data"].head())
print(f"Number of datasets: {len(dfs)}")


2023-07-27 16:07:52,111 INFO Downloader processing: geodata/data_sources/11000yearsunspotnumber.toml
2023-07-27 16:07:52,736 INFO Downloader Read https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/solar_variability/solanki2004-ssn.txt from cache at geodata/cache/solanki2004-ssn.txt
2023-07-27 16:07:52,736 INFO Downloader Creating dataset sunspots11000yrs
2023-07-27 16:07:52,740 INFO Downloader processing: geodata/data_sources/CO2_800k_composite.toml
2023-07-27 16:07:53,492 INFO Downloader Read https://www.ncei.noaa.gov/pub/data/paleo/icecore/antarctica/antarctica2015co2composite.txt from cache at geodata/cache/antarctica2015co2composite.txt
2023-07-27 16:07:53,492 INFO Downloader Creating dataset CO2_800k_composite
2023-07-27 16:07:53,495 INFO Downloader processing: geodata/data_sources/DWDGermanAugustMeanTemperature.toml
2023-07-27 16:07:53,574 INFO Downloader Read https://opendata.dwd.de/climate_environment/CDC/regional_averages_DE/monthly/air_temperature_mean/regional_averages

-----------------------------------------------
sunspots11000yrs
{'title': 'Solanki et al. 2004 11,000 Year Sunspot Number Reconstruction', 'description': 'Records of changes in solar irradiance, volcanic aerosols, atmospheric trace gases, and other properties thought to influence climate in the past. Parameter keywords describe what was measured in this data set. Additional summary information can be found in the abstracts of papers listed in the data set citations.', 'authors': ['Solanki, S.K.', 'Usoskin, I.G.', 'Kromer, B.', 'Schuessler, M.', 'Beer, J.'], 'publication_date': '2004-10-28', 'last_update': '2005-02', 'publisher': 'Environmental Research Letters, 11, 2, 024001', 'id': 'https://doi.org/10.25921/y6td-ne29', 'indra_id': 'solanki2004', 'indra_domain': '$event/geodata/paleo/{indra_id}/sunspots', 'version': '', 'data_source': 'https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/solar_variability/solanki2004-ssn.txt', 'related_publications': ['https://www.ncei.noaa.gov/ac

In [4]:
from datetime import timedelta, datetime, timezone

def convert_partial_year(number):
    year = int(number)
    d = timedelta(days=(number - year) * 365.25)
    day_one = datetime(year, 1, 1, tzinfo=timezone.utc)
    date = d + day_one
    return date

In [11]:
cl = IndraClient( verbose = True)
await cl.init_connection(verbose=True)

2023-07-27 16:10:21,722 INFO IndraClient Initialized IndraClient with uri=wss://pergamon:8082, ca_authority=/home/dsc/gith/domschl/indrajala/data/certs/ca-root.pem, auth_token=None


Profiles: ['pergamon_Ws.1']


<websockets.legacy.client.WebSocketClientProtocol at 0x7fd53129a110>

In [12]:
async def async_entire_history(domain):
    # data = await asyncio.create_task(cl.get_wait_history(domain, -1000000000))
    return await cl.get_wait_history(domain, -1000000000)

In [13]:
dataset = 'manualoaco2monthlymean'
column = 'co2'
df = dfs[dataset]['data']
print(df.head())
md = dfs[dataset]['metadata']
print(md['indra_id'], md['indra_domain'])
 
domain = md['indra_domain'].replace("{indra_id}", md['indra_id']).replace("{dataset}", dataset).replace('{column}', column)
print(domain)

existing_data = await async_entire_history(domain)
print(f"Start, existing data: {len(existing_data)} records")

# Enum rows:
dups=0
for i, row in df.iterrows():
    # get column 'decimal year':
    # print(f"{i}: {row['year']}-{row['month']}  z{row['decimal date']}, {row['average']}")
    # convert fractional year into datetime:
    dt = convert_partial_year(row['decimal date'])
    jd = IndraEvent.datetime2julian(dt)
    found = False
    for (j, e) in enumerate(existing_data):
        # print(jd,e[0])
        if abs(jd-e[0])<0.0001:
            # print(f"Found existing data for {jd}")
            found = True
            dups += 1
            break
    if found:
        continue
    ie = IndraEvent()
    ie.domain = domain
    ie.from_id = "py/MaunaLoaMonthlyCO2"
    ie.to_scope = "public"
    ie.time_jd_start = jd
    ie.data_type = "number/float/co2"
    ie.data = json.dumps(row['average'])
    print("Send new: ", ie.to_json())
    # await cl.send_event(ie)
print(f"Already existing data: {dups} records, new: {len(df)-dups} records")

2023-07-27 16:10:24,434 INFO IndraClient Sending: {"domain": "$trx/db/req/history", "from_id": "ws/python", "uuid4": "da66de86-3443-4e41-971f-7b409611fe45", "parent_uuid4": "", "seq_no": 0, "to_scope": "", "time_jd_start": 2460153.09056058, "data_type": "historyrequest", "data": "{\"domain\": \"$event/geodata/historical/manualoaco2monthlymean/co2\", \"time_jd_start\": -1000000000, \"time_jd_end\": null, \"limit\": null, \"data_type\": \"number/float%\", \"mode\": \"Sample\"}", "auth_hash": "", "time_jd_end": null}
2023-07-27 16:10:24,435 INFO IndraClient Received message: {"domain":"Ws.1/192.168.178.126:50524","from_id":"Storage.1","uuid4":"da66de86-3443-4e41-971f-7b409611fe45","parent_uuid4":null,"seq_no":null,"to_scope":"$event/geodata/historical/manualoaco2monthlymean/co2","time_jd_start":2460153.090560591,"data_type":"vector/tuple/jd/float","data":"[]","auth_hash":null,"time_jd_end":2460153.090560594}
2023-07-27 16:10:24,435 INFO IndraClient ----------------------------------------

   year  month  decimal date  average  deseasonalized  ndays  sdev  unc  \
0  1958      3     1958.2027   315.70          314.43    NaN   NaN  NaN   
1  1958      4     1958.2877   317.45          315.16    NaN   NaN  NaN   
2  1958      5     1958.3699   317.51          314.71    NaN   NaN  NaN   
3  1958      6     1958.4548   317.24          315.14    NaN   NaN  NaN   
4  1958      7     1958.5370   315.86          315.18    NaN   NaN  NaN   

   time_jd_start  mean/co  deseasonalized/co2  
0      1958.2027   315.70              314.43  
1      1958.2877   317.45              315.16  
2      1958.3699   317.51              314.71  
3      1958.4548   317.24              315.14  
4      1958.5370   315.86              315.18  
manualoaco2monthlymean $event/geodata/historical/{dataset}/{column}
$event/geodata/historical/manualoaco2monthlymean/co2
Start, existing data: 0 records
Send new:  {"domain": "$event/geodata/historical/manualoaco2monthlymean/co2", "from_id": "py/MaunaLoaMonthly

2023-07-28 07:59:49,299 ERROR IndraClient Could not receive message: no close frame received or sent
2023-07-28 07:59:49,301 ERROR IndraClient Could not receive message: no close frame received or sent


In [18]:
domain

'$event/geodata/historical/manualoaco2monthlymean/co2'

In [None]:
data = await async_entire_history(domain)


2023-06-22 09:54:15,707 INFO IndraClient Sending: {"domain": "$trx/db/req/history", "from_id": "ws/python", "uuid4": "1d3b74ec-6dca-41bb-ae57-35c5c5ef9a48", "to_scope": "", "time_jd_start": 2460117.82934847, "data_type": "historyrequest", "data": "{\"domain\": \"$event/geodata/historical/{dataset}/{column}\", \"time_jd_start\": -1000000000, \"time_jd_end\": null, \"limit\": null, \"data_type\": \"number/float%\", \"mode\": \"Sample\"}", "auth_hash": "", "time_jd_end": null}
2023-06-22 09:54:15,711 INFO IndraClient Received message: {"domain":"Ws.1/127.0.0.1:50848","from_id":"SQLx.1","uuid4":"1d3b74ec-6dca-41bb-ae57-35c5c5ef9a48","to_scope":"$event/geodata/historical/{dataset}/{column}","time_jd_start":2460117.829348492,"data_type":"vector/tuple/jd/float","data":"[]","auth_hash":null,"time_jd_end":2460117.829348503}
2023-06-22 09:54:15,712 INFO IndraClient ---------------------------------------------------------------
2023-06-22 09:54:15,712 INFO IndraClient Future: trx event $event/ge

In [None]:
data

[]

2023-06-22 09:57:08,713 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,715 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,715 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,716 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,716 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,717 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,717 ERROR IndraClient Could not receive message: no close frame received or sent
2023-06-22 09:57:08,718 ERROR IndraClient Could not receive message: no close frame received or sent


In [None]:
df = pd.DataFrame([1,2,3],[3,4,5])