In [1]:
import os, sys, json, re
import itertools, importlib
import asyncpg, asyncio, threading

from edcompanion.eddbreader import edc_dbfilereader, edc_dbfile_process
from edcompanion.edsm_api import get_edsm_info, distance_between_systems

In [2]:
pgsql_params = dict(
    dsn=os.getenv("PGSQL_URL"),
    server_settings={'search_path': "eddb"}
)
pgpool = await asyncpg.create_pool(**pgsql_params)

In [3]:
dbname_full = 'systems.json.gz'
dbname_1day = 'systems_1day.json.gz'
dbname_1week = 'systems_1week.json.gz'
dbname_2week = 'systems_2weeks.json.gz'
dbname_1month = 'systems_1month.json.gz'
dbname_6months = 'systems_6months.json.gz'

dbname_galaxy_1day = 'galaxy_1day.json.gz'

In [4]:
dataset = []
for item in edc_dbfilereader(os.path.join('data', dbname_1day)):
    dataset.append(item)
dataset[0]

Reading data/systems_1day.json.gz, 2.5 Mb in approx 1 chunks
1/1	100.00%, 260796 /s,     76758 systems,   0.0 seconds remaining
Empty chunk -> Done! Imported 76758 systems in 0.3 seconds

0.29647252999999996 seconds 76758 systems, per system 3.86 us

{'id64': 2326687,
 'name': 'HD 192281',
 'mainStar': 'O (Blue-White) Star',
 'coords': {'x': -4023.53125, 'y': 230.875, 'z': 896.46875},
 'updateTime': '2024-04-13 21:57:30+00'}

In [5]:
classifications = {s:c for s, c in zip('OBAFGKMN', range(9))}
#print(json.dumps(classifications, indent=2))

main_star_types={}
try:
    with open(os.path.join('data', 'clasifications.json'),'rt') as jsonfile:
        main_star_types.update(json.load(jsonfile))
except:
    pass

#print(json.dumps(main_star_types, indent=2))


In [7]:
if len(main_star_types) < 40:
    for item in edc_dbfilereader(os.path.join('data', dbname_2week)):
        main_star = item.get('mainStar')

        if main_star and not main_star_types.get(main_star):
            main_star_types[main_star] = len(classifications)


    try:
        with open(os.path.join('data', 'main_star_types.json'),'wt') as jsonfile:
            json.dump(main_star_types, jsonfile, indent=3)
    except:
        pass


## Create & Build

In [None]:
#await pgpool.execute(f"DROP TABLE IF EXISTS systems;")
await pgpool.execute(f"""
    CREATE TABLE IF NOT EXISTS systems (
        id64 BIGINT NOT NULL,
        x DOUBLE PRECISION  NOT NULL,
        y DOUBLE PRECISION  NOT NULL,
        z DOUBLE PRECISION  NOT NULL,
        name TEXT NOT NULL
    );
""")


In [None]:

async def push_records(data):
    return await pgpool.copy_records_to_table("systems", records=data)

records = []
count=0
for item in edc_dbfilereader(os.path.join('data', dbname_full), verbose=True):
    
    if not item:
        continue

    coords = item.get('coords')
    coordinates = [coords[k] for k in ['x','y','z']]
    records.append(
        [item.get("id64")] + coordinates + [item.get('name')] 
    )
    count +=1
    if count > 10000:
        qr = await pgpool.copy_records_to_table("systems", records=records)
        records = []
        count = 0

if count > 0:
    qr = await pgpool.copy_records_to_table("systems", records=records)
    records = []
    count = 0


In [None]:
print(f"Adding indexes ...")
await pgpool.execute(f"""
    CREATE INDEX IF NOT EXISTS systems_x_idx ON systems (x);
    CREATE INDEX IF NOT EXISTS systems_y_idx ON systems (y);
    CREATE INDEX IF NOT EXISTS systems_z_idx ON systems (z); 
    CREATE INDEX IF NOT EXISTS systems_name_idx ON systems (name);
    CREATE INDEX IF NOT EXISTS systems_id64_idx ON systems (id64);
""")

In [None]:
print("Removing duplicates by system name")
await pgpool.execute("""
    DELETE FROM systems a
    WHERE   a.ctid <> (SELECT min(b.ctid)
                     FROM   systems b
                     WHERE  a.name = b.name );"""
)

In [None]:
print(f"Adding unique index on system name ...")
await pgpool.execute(f"""
    DROP INDEX systems_name_idx ;
    CREATE UNIQUE INDEX IF NOT EXISTS systems_name_unique ON eddb.systems (name)
""")

In [None]:
print("Removing duplicates by system id")
await pgpool.execute("""
    DELETE FROM systems a
    WHERE   a.ctid <> (SELECT min(b.ctid)
                     FROM   systems b
                     WHERE  a.id64 = b.id64 );"""
)

In [None]:
print(f"Adding unique index on system id ...")
await pgpool.execute(f"""
    DROP INDEX systems_id64_idx ;
    CREATE UNIQUE INDEX IF NOT EXISTS systems_id64_unique ON eddb.systems (id64)
""")

## Update

In [5]:
def get_coordinates_from_item(item):
    coords = item.get('coords')
    return [coords[k] for k in ['x','y','z']]

async def process_data(datachunk):

    return await pgpool.executemany(
            """INSERT INTO systems (id64, x, y, z, name) 
                VALUES ($1, $2, $3, $4, $5) 
                ON CONFLICT DO NOTHING
            """, [
                [item.get("id64")] + get_coordinates_from_item(item) + [item.get('name')] 
                for item in datachunk
            ]
    )

await edc_dbfile_process(
    os.path.join('data', dbname_1month),
    process_data,
    verbose=True
)

Reading data/systems_1month.json.gz, 63.8 Mb in approx 32 chunks
21/32	65.62%, 143900 /s,   1960413 systems,   7.1 seconds remaining
Empty chunk -> Done! Imported 1960413 systems in 13.6 seconds

13.626672066000001 seconds 1960413 systems, per system 6.95 us

## Galaxy



In [6]:
dataset = []
for item in edc_dbfilereader(os.path.join('data', dbname_galaxy_1day)):
    dataset.append(item)
    if len  (dataset) > 10:
        break
dataset[0]

Reading data/galaxy_1day.json.gz, 825.4 Mb in approx 104 chunks


{'id64': 2587943,
 'name': 'Great Annihilator',
 'coords': {'x': 354.84375, 'y': -42.4375, 'z': 22997.21875},
 'allegiance': None,
 'government': 'None',
 'primaryEconomy': 'None',
 'secondaryEconomy': 'None',
 'security': 'Anarchy',
 'population': 0,
 'bodyCount': 8,
 'date': '2024-10-02 14:49:00+00',
 'bodies': [{'id64': 36028797021551911,
   'bodyId': 1,
   'name': 'Great Annihilator A',
   'type': 'Star',
   'subType': 'Black Hole',
   'distanceToArrival': 0.0,
   'mainStar': True,
   'age': 2,
   'spectralClass': None,
   'luminosity': 'VII',
   'absoluteMagnitude': 20.0,
   'solarMasses': 198.097656,
   'solarRadius': 0.000840251168224299,
   'surfaceTemperature': 0.0,
   'rotationalPeriod': 1.03721064814815e-06,
   'axialTilt': 0.0,
   'parents': [{'Null': 0}],
   'orbitalPeriod': 227703.00231598,
   'semiMajorAxis': 123.178692106994,
   'orbitalEccentricity': 0.12315,
   'orbitalInclination': 86.230769,
   'argOfPeriapsis': 348.433399,
   'meanAnomaly': 30.921444,
   'ascending

In [7]:
edsm_item = get_edsm_info('Great Annihilator', verbose=True)
edsm_item

{'id': 65259,
 'id64': 2587943,
 'name': 'Great Annihilator',
 'url': 'https://www.edsm.net/en/system/bodies/id/65259/name/Great+Annihilator',
 'bodyCount': 8,
 'bodies': [{'id': 48564,
   'id64': 36028797021551911,
   'bodyId': 1,
   'name': 'Great Annihilator A',
   'discovery': {'commander': 'Martin Afonso', 'date': '2016-10-27 18:49:20'},
   'type': 'Star',
   'subType': 'Black Hole',
   'parents': [{'Null': 0}],
   'distanceToArrival': 0,
   'isMainStar': True,
   'isScoopable': False,
   'age': 2,
   'spectralClass': None,
   'luminosity': 'VII',
   'absoluteMagnitude': 20,
   'solarMasses': 198.097656,
   'solarRadius': 0.000840251168224299,
   'surfaceTemperature': 0,
   'orbitalPeriod': 227703.00231598382,
   'semiMajorAxis': 123.17869210699267,
   'orbitalEccentricity': 0.12315,
   'orbitalInclination': 86.230769,
   'argOfPeriapsis': 348.433399,
   'rotationalPeriod': 1.037210648148148e-06,
   'rotationalPeriodTidallyLocked': False,
   'axialTilt': None,
   'updateTime': '20

In [19]:
ignore_keys = set([ 'thargoidWar'])

def update_structure(structure,from_item):

    assert isinstance(structure, set) or (isinstance(structure, dict) and  isinstance(from_item, dict)) or (isinstance(structure, list) and  isinstance(from_item, list)), f"structure and from_item must both be dict or list but found {type(structure)} and {type(from_item)}\n  structure:{structure}\nfrom_item:{from_item}"



    if isinstance(structure, dict):

        for k, i in from_item.items():
            if k in ignore_keys:
                continue

            if k in structure:
                update_structure(structure[k], i)

            else:
                if isinstance(i, dict):
                    structure[k] = dict()
                    update_structure(structure[k], i)
                elif isinstance(i, list):
                    structure[k] = list([dict(), set()])
                    update_structure(structure[k], i)
                else:
                    structure[k] = set()
                    update_structure(structure[k], i)

    elif isinstance(structure, list):
        sample_item = structure[0]
        for item in from_item:

            if isinstance(sample_item, set) or isinstance(item, dict):
                update_structure(sample_item, item)

            else:
                sample_set = structure[1]
                update_structure(sample_set, item)

    elif isinstance((structure), set):
        structure.add(type(from_item))

    elif isinstance(from_item, str):
        #pass
        print(from_item)
        #print(structure)


In [10]:
teststructure = dict()


In [11]:
update_structure(teststructure, edsm_item)


In [15]:
teststructure

{'id': {int},
 'id64': {int},
 'name': {str},
 'url': {str},
 'bodyCount': {int},
 'bodies': [{'id': {int},
   'id64': {int},
   'bodyId': {int},
   'name': {str},
   'discovery': {'commander': {str}, 'date': {str}},
   'type': {str},
   'subType': {str},
   'parents': [{'Null': {int}, 'Star': {int}, 'Planet': {int}}, set()],
   'distanceToArrival': {float, int},
   'isMainStar': {bool},
   'isScoopable': {bool},
   'age': {int},
   'spectralClass': {NoneType, str},
   'luminosity': {str},
   'absoluteMagnitude': {float, int},
   'solarMasses': {float},
   'solarRadius': {float},
   'surfaceTemperature': {float, int},
   'orbitalPeriod': {float},
   'semiMajorAxis': {float},
   'orbitalEccentricity': {NoneType, float},
   'orbitalInclination': {float},
   'argOfPeriapsis': {float},
   'rotationalPeriod': {float},
   'rotationalPeriodTidallyLocked': {bool},
   'axialTilt': {NoneType, float},
   'updateTime': {str},
   'isLandable': {bool},
   'gravity': {float},
   'earthMasses': {float

In [13]:
len(dataset)

11

In [14]:
for item in dataset:
    update_structure(teststructure, item)

In [None]:
teststructure

In [20]:

for item in edc_dbfilereader(os.path.join('data', dbname_galaxy_1day), verbose=True):
    update_structure(teststructure, item)


Reading data/galaxy_1day.json.gz, 825.4 Mb in approx 104 chunks
80/104	76.92%,    980 /s,     92601 systems,  28.3 seconds remaining
Empty chunk -> Done! Imported 92601 systems in 94.4 seconds

94.429778123 seconds 92601 systems, per system 1019.75 us

In [17]:
teststructure

{'id': {int},
 'id64': {int},
 'name': {str},
 'url': {str},
 'bodyCount': {int},
 'bodies': [{'id': {int},
   'id64': {int},
   'bodyId': {int},
   'name': {str},
   'discovery': {'commander': {str}, 'date': {str}},
   'type': {str},
   'subType': {str},
   'parents': [{'Null': {int}, 'Star': {int}, 'Planet': {int}}, set()],
   'distanceToArrival': {float, int},
   'isMainStar': {bool},
   'isScoopable': {bool},
   'age': {int},
   'spectralClass': {NoneType, str},
   'luminosity': {str},
   'absoluteMagnitude': {float, int},
   'solarMasses': {float},
   'solarRadius': {float},
   'surfaceTemperature': {float, int},
   'orbitalPeriod': {float},
   'semiMajorAxis': {float},
   'orbitalEccentricity': {NoneType, float},
   'orbitalInclination': {float},
   'argOfPeriapsis': {float},
   'rotationalPeriod': {float},
   'rotationalPeriodTidallyLocked': {bool},
   'axialTilt': {NoneType, float},
   'updateTime': {str},
   'isLandable': {bool},
   'gravity': {float},
   'earthMasses': {float

In [18]:
list(teststructure.keys())

['id',
 'id64',
 'name',
 'url',
 'bodyCount',
 'bodies',
 'coords',
 'allegiance',
 'government',
 'primaryEconomy',
 'secondaryEconomy',
 'security',
 'population',
 'date',
 'controllingFaction',
 'powers',
 'powerState']

In [50]:
def fish_for_lists(structure, all_lists):

    if isinstance(structure, list):
        for item in structure:
            fish_for_lists(item, all_lists)

    elif isinstance(structure, dict):

        for k, v in structure.items():
            if isinstance(v, list):
                all_lists[k] = len(v[0].keys())          
            fish_for_lists(v, all_lists)

In [51]:
collected_lists = dict()
fish_for_lists(teststructure, collected_lists)
collected_lists

{'bodies': 47,
 'parents': 3,
 'genuses': 0,
 'rings': 6,
 'belts': 5,
 'stations': 19,
 'services': 0,
 'commodities': 8,
 'prohibitedCommodities': 0,
 'modules': 7,
 'ships': 3,
 'powers': 0,
 'factions': 5}

In [53]:
body_lists = dict()
fish_for_lists(teststructure['bodies'][0], body_lists)
body_lists

{'parents': 3,
 'genuses': 0,
 'rings': 6,
 'belts': 5,
 'stations': 19,
 'services': 0,
 'commodities': 8,
 'prohibitedCommodities': 0,
 'modules': 7,
 'ships': 3}

In [67]:
{k:len(v[0])for k, v in teststructure['bodies'][0].items() if isinstance(v, list)}

{'parents': 3, 'rings': 6, 'belts': 5, 'stations': 19}

In [62]:
{k:v for k, v in teststructure['bodies'][0].items() if k not in set(body_lists.keys())}

{'id': {int},
 'id64': {int},
 'bodyId': {int},
 'name': {str},
 'discovery': {'commander': {str}, 'date': {str}},
 'type': {str},
 'subType': {str},
 'distanceToArrival': {float, int},
 'isMainStar': {bool},
 'isScoopable': {bool},
 'age': {int},
 'spectralClass': {NoneType, str},
 'luminosity': {str},
 'absoluteMagnitude': {float, int},
 'solarMasses': {float},
 'solarRadius': {float},
 'surfaceTemperature': {float, int},
 'orbitalPeriod': {float},
 'semiMajorAxis': {float},
 'orbitalEccentricity': {NoneType, float},
 'orbitalInclination': {float},
 'argOfPeriapsis': {float},
 'rotationalPeriod': {float},
 'rotationalPeriodTidallyLocked': {bool},
 'axialTilt': {NoneType, float},
 'updateTime': {str},
 'isLandable': {bool},
 'gravity': {float},
 'earthMasses': {float},
 'radius': {float},
 'surfacePressure': {float},
 'volcanismType': {str},
 'atmosphereType': {NoneType, str},
 'atmosphereComposition': {'Helium': {float},
  'Hydrogen': {float},
  'Carbon dioxide': {float},
  'Silicate

In [59]:
[v for k, v in teststructure['bodies'][0].items() if 'signal' in k]

[{'signals': {'$SAA_SignalType_Human;': {int},
   '$SAA_SignalType_Biological;': {int},
   '$SAA_SignalType_Geological;': {int},
   '$SAA_SignalType_Other;': {int},
   '$SAA_SignalType_Thargoid;': {int},
   '$SAA_SignalType_Guardian;': {int},
   'Painite': {int},
   'Platinum': {int},
   'Rhodplumsite': {int},
   'Serendibite': {int},
   '$SAA_SignalType_PlanetAnomaly;': {int},
   'Benitoite': {int},
   'Monazite': {int},
   'Alexandrite': {int},
   'Bromellite': {int},
   'Grandidierite': {int},
   'LowTemperatureDiamond': {int},
   'Opal': {int},
   'Tritium': {int}},
  'updateTime': {str},
  'genuses': [{}, {str}]}]

In [63]:
[v for k, v in teststructure['bodies'][0].items() if 'signal' in k]




[{'signals': {'$SAA_SignalType_Human;': {int},
   '$SAA_SignalType_Biological;': {int},
   '$SAA_SignalType_Geological;': {int},
   '$SAA_SignalType_Other;': {int},
   '$SAA_SignalType_Thargoid;': {int},
   '$SAA_SignalType_Guardian;': {int},
   'Painite': {int},
   'Platinum': {int},
   'Rhodplumsite': {int},
   'Serendibite': {int},
   '$SAA_SignalType_PlanetAnomaly;': {int},
   'Benitoite': {int},
   'Monazite': {int},
   'Alexandrite': {int},
   'Bromellite': {int},
   'Grandidierite': {int},
   'LowTemperatureDiamond': {int},
   'Opal': {int},
   'Tritium': {int}},
  'updateTime': {str},
  'genuses': [{}, {str}]}]

In [174]:
def go_through_items(iterable):

    for item in iterable:
        print(item)
        

In [None]:
go_through_items(edc_dbfilereader(os.path.join('data', dbname_1day)))

In [33]:
import time

In [None]:
import importlib
import edcompanion.threadworker
importlib.reload(edcompanion.threadworker)

In [2]:
from edcompanion.threadworker import create_producer_from_iterable

In [8]:
producer = create_producer_from_iterable(edc_dbfilereader(os.path.join('data', dbname_1day)))

In [None]:
producer.start()

In [None]:
producer.get_item()

In [18]:
for item in producer.get_items():
    print(item)


In [16]:
producer.stop()

In [17]:
producer.join()