In [None]:
import os, sys, json, re
import itertools, importlib
import asyncpg, asyncio, threading

from edcompanion.eddbreader import edc_dbfilereader, edc_dbfile_process
from edcompanion.edsm_api import get_edsm_info, distance_between_systems

In [None]:
pgsql_params = dict(
    dsn=os.getenv("PGSQL_URL"),
    server_settings={'search_path': "eddb"}
)
pgpool = await asyncpg.create_pool(**pgsql_params)

In [None]:
dbname_full = 'systems.json.gz'
dbname_1day = 'systems_1day.json.gz'
dbname_1week = 'systems_1week.json.gz'
dbname_2week = 'systems_2weeks.json.gz'
dbname_1month = 'systems_1month.json.gz'
dbname_6months = 'systems_6months.json.gz'

dbname_galaxy_1day = 'galaxy_1day.json.gz'

In [None]:
dataset = []
for item in edc_dbfilereader(os.path.join('data', dbname_1day)):
    dataset.append(item)
dataset[0]

Reading data/systems_1day.json.gz, 2.5 Mb in approx 1 chunks
1/1	100.00%, 270915 /s,     76758 systems,   0.0 seconds remaining
Empty chunk -> Done! Imported 76758 systems in 0.3 seconds

0.28506417299999987 seconds 76758 systems, per system 3.71 us

{'id64': 2326687,
 'name': 'HD 192281',
 'mainStar': 'O (Blue-White) Star',
 'coords': {'x': -4023.53125, 'y': 230.875, 'z': 896.46875},
 'updateTime': '2024-04-13 21:57:30+00'}

In [None]:
classifications = {s:c for s, c in zip('OBAFGKMN', range(9))}
#print(json.dumps(classifications, indent=2))

main_star_types={}
try:
    with open(os.path.join('data', 'clasifications.json'),'rt') as jsonfile:
        main_star_types.update(json.load(jsonfile))
except:
    pass

#print(json.dumps(main_star_types, indent=2))


In [None]:
if len(main_star_types) < 40:
    for item in edc_dbfilereader(os.path.join('data', dbname_2week)):
        main_star = item.get('mainStar')

        if main_star and not main_star_types.get(main_star):
            main_star_types[main_star] = len(classifications)


    try:
        with open(os.path.join('data', 'main_star_types.json'),'wt') as jsonfile:
            json.dump(main_star_types, jsonfile, indent=3)
    except:
        pass


## Create & Build

In [None]:
#await pgpool.execute(f"DROP TABLE IF EXISTS systems;")
await pgpool.execute(f"""
    CREATE TABLE IF NOT EXISTS systems (
        id64 BIGINT NOT NULL,
        x DOUBLE PRECISION  NOT NULL,
        y DOUBLE PRECISION  NOT NULL,
        z DOUBLE PRECISION  NOT NULL,
        name TEXT NOT NULL
    );
""")


In [None]:

async def push_records(data):
    return await pgpool.copy_records_to_table("systems", records=data)

records = []
count=0
for item in edc_dbfilereader(os.path.join('data', dbname_full), verbose=True):
    
    if not item:
        continue

    coords = item.get('coords')
    coordinates = [coords[k] for k in ['x','y','z']]
    records.append(
        [item.get("id64")] + coordinates + [item.get('name')] 
    )
    count +=1
    if count > 10000:
        qr = await pgpool.copy_records_to_table("systems", records=records)
        records = []
        count = 0

if count > 0:
    qr = await pgpool.copy_records_to_table("systems", records=records)
    records = []
    count = 0


In [None]:
print(f"Adding indexes ...")
await pgpool.execute(f"""
    CREATE INDEX IF NOT EXISTS systems_x_idx ON systems (x);
    CREATE INDEX IF NOT EXISTS systems_y_idx ON systems (y);
    CREATE INDEX IF NOT EXISTS systems_z_idx ON systems (z); 
    CREATE INDEX IF NOT EXISTS systems_name_idx ON systems (name);
    CREATE INDEX IF NOT EXISTS systems_id64_idx ON systems (id64);
""")

In [None]:
print("Removing duplicates by system name")
await pgpool.execute("""
    DELETE FROM systems a
    WHERE   a.ctid <> (SELECT min(b.ctid)
                     FROM   systems b
                     WHERE  a.name = b.name );"""
)

print(f"Adding unique index on system name ...")
await pgpool.execute(f"""
    DROP INDEX systems_name_idx ;
    CREATE UNIQUE INDEX IF NOT EXISTS systems_name_unique ON eddb.systems (name)
""")

In [None]:
print("Removing duplicates by system id")
await pgpool.execute("""
    DELETE FROM systems a
    WHERE   a.ctid <> (SELECT min(b.ctid)
                     FROM   systems b
                     WHERE  a.id64 = b.id64 );"""
)

In [None]:
print(f"Adding unique index on system id ...")
await pgpool.execute(f"""
    DROP INDEX systems_id64_idx ;
    CREATE UNIQUE INDEX IF NOT EXISTS systems_id64_unique ON eddb.systems (id64)
""")

## Update

In [None]:
def get_coordinates_from_item(item):
    coords = item.get('coords')
    return [coords[k] for k in ['x','y','z']]

async def process_data(datachunk):

    return await pgpool.executemany(
            """INSERT INTO systems (id64, x, y, z, name) 
                VALUES ($1, $2, $3, $4, $5) 
                ON CONFLICT DO NOTHING
            """, [
                [item.get("id64")] + get_coordinates_from_item(item) + [item.get('name')] 
                for item in datachunk
            ]
    )

await edc_dbfile_process(
    os.path.join('data', dbname_6months),
    process_data,
    verbose=True
)

Reading data/systems_6months.json.gz, 569.6 Mb in approx 285 chunks
191/285	67.02%, 144015 /s,  17879879 systems,  61.1 seconds remaining
Empty chunk -> Done! Imported 17879879 systems in 124.2 seconds

124.153779107 seconds 17879879 systems, per system 6.94 us

## Galaxy



In [None]:
dataset = []
for item in edc_dbfilereader(os.path.join('data', dbname_galaxy_1day)):
    dataset.append(item)
    if len  (dataset) > 10:
        break
dataset[2]

Reading data/galaxy_1day.json.gz, 825.4 Mb in approx 104 chunks


{'id64': 19528062,
 'name': 'Gria Drye CL-Y g0',
 'coords': {'x': -2554.5625, 'y': -64.375, 'z': 6210.8125},
 'allegiance': None,
 'government': 'None',
 'primaryEconomy': 'None',
 'secondaryEconomy': 'None',
 'security': 'Anarchy',
 'population': 0,
 'bodyCount': 45,
 'date': '2024-10-02 19:44:05+00',
 'bodies': [{'id64': 36028797038492030,
   'bodyId': 1,
   'name': 'Gria Drye CL-Y g0 A',
   'type': 'Star',
   'subType': 'B (Blue-White) Star',
   'distanceToArrival': 0.0,
   'mainStar': True,
   'age': 206,
   'spectralClass': 'B0',
   'luminosity': 'Vz',
   'absoluteMagnitude': -1.94809,
   'solarMasses': 5.582031,
   'solarRadius': 2.94024649892164,
   'surfaceTemperature': 16041.0,
   'rotationalPeriod': 1.12239243681713,
   'rotationalPeriodTidallyLocked': False,
   'axialTilt': 0.0,
   'parents': [{'Null': 0}],
   'orbitalPeriod': 104291.734320146,
   'semiMajorAxis': 32.4487312391419,
   'orbitalEccentricity': 0.001444,
   'orbitalInclination': -62.840223,
   'argOfPeriapsis': 

In [None]:
list(dataset[1].keys())

['id64',
 'name',
 'coords',
 'allegiance',
 'government',
 'primaryEconomy',
 'secondaryEconomy',
 'security',
 'population',
 'bodyCount',
 'date',
 'bodies',
 'stations']

In [None]:
edsm_item = get_edsm_info('Capricorni Sector GR-V b2-7', verbose=True)
edsm_item

{'id': 62860,
 'id64': 16063580153297,
 'name': 'Capricorni Sector GR-V b2-7',
 'url': 'https://www.edsm.net/en/system/bodies/id/62860/name/Capricorni+Sector+GR-V+b2-7',
 'bodyCount': 10,
 'bodies': [{'id': 3229484,
   'id64': 16063580153297,
   'bodyId': 0,
   'name': 'Capricorni Sector GR-V b2-7',
   'discovery': {'commander': 'Elferren', 'date': '2018-01-08 14:19:43'},
   'type': 'Star',
   'subType': 'M (Red dwarf) Star',
   'parents': None,
   'distanceToArrival': 0,
   'isMainStar': True,
   'isScoopable': True,
   'age': 414,
   'spectralClass': 'M3',
   'luminosity': 'Va',
   'absoluteMagnitude': 8.805634,
   'solarMasses': 0.398438,
   'solarRadius': 0.5570678245866283,
   'surfaceTemperature': 3100,
   'orbitalPeriod': None,
   'semiMajorAxis': None,
   'orbitalEccentricity': None,
   'orbitalInclination': None,
   'argOfPeriapsis': None,
   'rotationalPeriod': 1.8084178868287037,
   'rotationalPeriodTidallyLocked': False,
   'axialTilt': None,
   'updateTime': '2020-06-11 14

In [None]:
list(edsm_item.keys())

['id', 'id64', 'name', 'url', 'bodyCount', 'bodies']

In [None]:
ignore_keys = set([ 'thargoidWar'])

def update_structure(structure,from_item):

    assert isinstance(structure, set) or (isinstance(structure, dict) and  isinstance(from_item, dict)) or (isinstance(structure, list) and  isinstance(from_item, list)), f"structure and from_item must both be dict or list but found {type(structure)} and {type(from_item)}\n  structure:{structure}\nfrom_item:{from_item}"



    if isinstance(structure, dict):

        for k, i in from_item.items():
            if k in ignore_keys:
                continue

            if k in structure:
                update_structure(structure[k], i)

            else:
                if isinstance(i, dict):
                    structure[k] = dict()
                    update_structure(structure[k], i)
                elif isinstance(i, list):
                    structure[k] = list([dict(), set()])
                    update_structure(structure[k], i)
                else:
                    structure[k] = set()
                    update_structure(structure[k], i)

    elif isinstance(structure, list):
        sample_item = structure[0]
        for item in from_item:

            if isinstance(sample_item, set) or isinstance(item, dict):
                update_structure(sample_item, item)

            else:
                sample_set = structure[1]
                update_structure(sample_set, item)

    elif isinstance((structure), set):
        structure.add(type(from_item))

    elif isinstance(from_item, str):
        #pass
        print(from_item)
        #print(structure)


In [None]:
teststructure = dict()


In [None]:
update_structure(teststructure, edsm_item)


In [None]:
teststructure

{'id': {int},
 'id64': {int},
 'name': {str},
 'url': {str},
 'bodyCount': {int},
 'bodies': [{'id': {int},
   'id64': {int},
   'bodyId': {int},
   'name': {str},
   'discovery': {'commander': {str}, 'date': {str}},
   'type': {str},
   'subType': {str},
   'parents': [{'Null': {int}, 'Star': {int}, 'Planet': {int}}, set()],
   'distanceToArrival': {float, int},
   'isMainStar': {bool},
   'isScoopable': {bool},
   'age': {int},
   'spectralClass': {NoneType, str},
   'luminosity': {str},
   'absoluteMagnitude': {float, int},
   'solarMasses': {float},
   'solarRadius': {float},
   'surfaceTemperature': {float, int},
   'orbitalPeriod': {float},
   'semiMajorAxis': {float},
   'orbitalEccentricity': {NoneType, float},
   'orbitalInclination': {float},
   'argOfPeriapsis': {float},
   'rotationalPeriod': {float},
   'rotationalPeriodTidallyLocked': {bool},
   'axialTilt': {NoneType, float},
   'updateTime': {str},
   'isLandable': {bool},
   'gravity': {float},
   'earthMasses': {float

In [None]:
len(dataset)

11

In [None]:
for item in dataset:
    update_structure(teststructure, item)

In [None]:
teststructure

In [None]:

for item in edc_dbfilereader(os.path.join('data', dbname_galaxy_1day), verbose=True):
    update_structure(teststructure, item)


Reading data/galaxy_1day.json.gz, 825.4 Mb in approx 104 chunks
80/104	76.92%,    980 /s,     92601 systems,  28.3 seconds remaining
Empty chunk -> Done! Imported 92601 systems in 94.4 seconds

94.429778123 seconds 92601 systems, per system 1019.75 us

In [None]:
teststructure

{'id': {int},
 'id64': {int},
 'name': {str},
 'url': {str},
 'bodyCount': {int},
 'bodies': [{'id': {int},
   'id64': {int},
   'bodyId': {int},
   'name': {str},
   'discovery': {'commander': {str}, 'date': {str}},
   'type': {str},
   'subType': {str},
   'parents': [{'Null': {int}, 'Star': {int}, 'Planet': {int}}, set()],
   'distanceToArrival': {float, int},
   'isMainStar': {bool},
   'isScoopable': {bool},
   'age': {int},
   'spectralClass': {NoneType, str},
   'luminosity': {str},
   'absoluteMagnitude': {float, int},
   'solarMasses': {float},
   'solarRadius': {float},
   'surfaceTemperature': {float, int},
   'orbitalPeriod': {float},
   'semiMajorAxis': {float},
   'orbitalEccentricity': {NoneType, float},
   'orbitalInclination': {float},
   'argOfPeriapsis': {float},
   'rotationalPeriod': {float},
   'rotationalPeriodTidallyLocked': {bool},
   'axialTilt': {NoneType, float},
   'updateTime': {str},
   'isLandable': {bool},
   'gravity': {float},
   'earthMasses': {float

In [None]:
list(teststructure.keys())

['id',
 'id64',
 'name',
 'url',
 'bodyCount',
 'bodies',
 'coords',
 'allegiance',
 'government',
 'primaryEconomy',
 'secondaryEconomy',
 'security',
 'population',
 'date',
 'controllingFaction',
 'powers',
 'powerState']

In [None]:
def fish_for_lists(structure, all_lists):

    if isinstance(structure, list):
        for item in structure:
            fish_for_lists(item, all_lists)

    elif isinstance(structure, dict):

        for k, v in structure.items():
            if isinstance(v, list):
                all_lists[k] = len(v[0].keys())          
            fish_for_lists(v, all_lists)

In [None]:
collected_lists = dict()
fish_for_lists(teststructure, collected_lists)
collected_lists

{'bodies': 47,
 'parents': 3,
 'genuses': 0,
 'rings': 6,
 'belts': 5,
 'stations': 19,
 'services': 0,
 'commodities': 8,
 'prohibitedCommodities': 0,
 'modules': 7,
 'ships': 3,
 'powers': 0,
 'factions': 5}

In [None]:
body_lists = dict()
fish_for_lists(teststructure['bodies'][0], body_lists)
body_lists

{'parents': 3,
 'genuses': 0,
 'rings': 6,
 'belts': 5,
 'stations': 19,
 'services': 0,
 'commodities': 8,
 'prohibitedCommodities': 0,
 'modules': 7,
 'ships': 3}

In [None]:
{k:len(v[0])for k, v in teststructure['bodies'][0].items() if isinstance(v, list)}

{'parents': 3, 'rings': 6, 'belts': 5, 'stations': 19}

In [None]:
{k:v for k, v in teststructure['bodies'][0].items() if k not in set(body_lists.keys())}

{'id': {int},
 'id64': {int},
 'bodyId': {int},
 'name': {str},
 'discovery': {'commander': {str}, 'date': {str}},
 'type': {str},
 'subType': {str},
 'distanceToArrival': {float, int},
 'isMainStar': {bool},
 'isScoopable': {bool},
 'age': {int},
 'spectralClass': {NoneType, str},
 'luminosity': {str},
 'absoluteMagnitude': {float, int},
 'solarMasses': {float},
 'solarRadius': {float},
 'surfaceTemperature': {float, int},
 'orbitalPeriod': {float},
 'semiMajorAxis': {float},
 'orbitalEccentricity': {NoneType, float},
 'orbitalInclination': {float},
 'argOfPeriapsis': {float},
 'rotationalPeriod': {float},
 'rotationalPeriodTidallyLocked': {bool},
 'axialTilt': {NoneType, float},
 'updateTime': {str},
 'isLandable': {bool},
 'gravity': {float},
 'earthMasses': {float},
 'radius': {float},
 'surfacePressure': {float},
 'volcanismType': {str},
 'atmosphereType': {NoneType, str},
 'atmosphereComposition': {'Helium': {float},
  'Hydrogen': {float},
  'Carbon dioxide': {float},
  'Silicate

In [None]:
[v for k, v in teststructure['bodies'][0].items() if 'signal' in k]

[{'signals': {'$SAA_SignalType_Human;': {int},
   '$SAA_SignalType_Biological;': {int},
   '$SAA_SignalType_Geological;': {int},
   '$SAA_SignalType_Other;': {int},
   '$SAA_SignalType_Thargoid;': {int},
   '$SAA_SignalType_Guardian;': {int},
   'Painite': {int},
   'Platinum': {int},
   'Rhodplumsite': {int},
   'Serendibite': {int},
   '$SAA_SignalType_PlanetAnomaly;': {int},
   'Benitoite': {int},
   'Monazite': {int},
   'Alexandrite': {int},
   'Bromellite': {int},
   'Grandidierite': {int},
   'LowTemperatureDiamond': {int},
   'Opal': {int},
   'Tritium': {int}},
  'updateTime': {str},
  'genuses': [{}, {str}]}]

In [None]:
[v for k, v in teststructure['bodies'][0].items() if 'signal' in k]




[{'signals': {'$SAA_SignalType_Human;': {int},
   '$SAA_SignalType_Biological;': {int},
   '$SAA_SignalType_Geological;': {int},
   '$SAA_SignalType_Other;': {int},
   '$SAA_SignalType_Thargoid;': {int},
   '$SAA_SignalType_Guardian;': {int},
   'Painite': {int},
   'Platinum': {int},
   'Rhodplumsite': {int},
   'Serendibite': {int},
   '$SAA_SignalType_PlanetAnomaly;': {int},
   'Benitoite': {int},
   'Monazite': {int},
   'Alexandrite': {int},
   'Bromellite': {int},
   'Grandidierite': {int},
   'LowTemperatureDiamond': {int},
   'Opal': {int},
   'Tritium': {int}},
  'updateTime': {str},
  'genuses': [{}, {str}]}]

In [None]:
def go_through_items(iterable):

    for item in iterable:
        print(item)
        

In [None]:
go_through_items(edc_dbfilereader(os.path.join('data', dbname_1day)))

In [None]:
import time

In [None]:
import importlib
import edcompanion.threadworker
importlib.reload(edcompanion.threadworker)

In [None]:
from edcompanion.threadworker import create_producer_from_iterable

In [None]:
producer = create_producer_from_iterable(edc_dbfilereader(os.path.join('data', dbname_1day)))

In [None]:
producer.start()

In [None]:
producer.get_item()

In [None]:
for item in producer.get_items():
    print(item)


In [None]:
producer.stop()

In [None]:
producer.join()