In [None]:
import json
import re
import os
import gzip
import numbers
import numpy as np
import pandas as pd
import platform
import multiprocessing as mp
from functools import partial
import statsmodels.api as sm
import seaborn as sns
import matplotlib as mpl
from IPython.display import display, HTML
from datetime import datetime as dt
import matplotlib.pyplot as plt
from datetime import datetime as dt, timedelta as td

spectra_map = {0: 'M', 1: 'K', 2: 'G', 3: 'F', 4: 'A', 5: 'B', 6: 'O', 7: 'X'}
singularity_flags = {
    'TidalLocked': 1,
    'TidalLocked2': 2,
    'TidalLocked4': 4,
    'LaySide': 8,
    'ClockwiseRotate': 16,
    'MultipleSatellites': 32,
}
star_type_map = {
    0: 'Main Sequence',
    1: 'Giant',
    2: 'White Dwarf',
    3: 'Neutron Star',
    4: 'Black Hole',
}
vein_type_map = {'verbose': {
    'Iron': 'Iron', 'Copper': 'Copper', 'Silicium': 'Silicon', 'Titanium': 'Titanium', 
    'Stone': 'Stone', 'Coal': 'Coal', 'Oil' : 'Oil', 'Fireice': 'Fire ice', 'Diamond': 'Kimberlite',
    'Fractal': 'Fractal Silicon', 'Crysrub': 'Organic Crystal', 'Grat': 'Optical Grating', 
    'Bamboo': 'Spiniform Stalagmite Crystal', 'Mag': 'Unipolar Magnets', },
                 'terse': {
    'Iron': 'Iron', 'Copper': 'Copper', 'Silicium': 'Silicon', 'Titanium': 'Titanium', 
    'Stone': 'Stone', 'Coal': 'Coal', 'Oil' : 'Oil', 'Fireice': 'Fire ice', 'Diamond': 'Kimberlite',
    'Fractal': 'Fractal Silicon', 'Crysrub': 'Organic Crystal', 'Grat': 'Optical Grating', 
    'Bamboo': 'Spin. Stal. Crystal', 'Mag': 'Uni. Magnets', }
}['terse']
vein_types = list(vein_type_map.values())
ocean_types = ['Water', 'Sulfuric acid']
base_types = ['Iron', 'Copper', 'Silicon', 'Titanium', 'Stone', 'Coal']
gas_types = ['Hydrogen gas', 'Deuterium gas', 'Fire ice gas',]
S_scaled = gas_types + ['Oil']
M_scaled = [x for x in vein_types if x not in S_scaled]

previous_runs = [
    84459701, 16015752, 22778246, 26027001, 57786559, 51279269, 94520636, 
    32680784, 88834638, 22161804, 87759803, 76768927, 18238299, 43378616,
    84459702, 91684768, 82451776, 31106147, 
]

%load_ext autotime

In [None]:
export_path = {
    # original Windows location
    'Chonyi': '/mnt/c/' + r'\Users\Drake\Documents\Dyson Sphere Program\dsp_seedexporter'.replace('\\', '/'),
    
    # NTFS mount in Linux
    'chonyi': '/media/drake/Windows/Users/Drake/Documents/Dyson Sphere Program/dsp_seedexporter/',
    
    # copied from Chonyi
    'blincoln': os.path.expanduser('~/data/dsp/seeds'),
    'megalith': os.path.expanduser('~/data/dsp/seeds'),
}[platform.node()]
seed_re = re.compile(r'seed_(?P<seed>\d{8}).json.gz')

In [None]:
files = sorted(filter(seed_re.match, os.listdir(export_path)))
seeds = list(map(lambda x: seed_re.match(x).group('seed'), files))
print('{:d} files detected'.format(len(files)))

In [None]:
def process_universe(func, filename):
    file = os.path.join(export_path, filename)
    try:
        with gzip.GzipFile(file, mode='r') as fd:
            universe = json.load(fd)
            universe['meta']['mtime'] = dt.fromtimestamp(os.stat(file).st_mtime)
    except EOFError as e:
        raise ValueError('Bad file: ' + file)
    return func(universe)

def passthrough(x):
    return x

subset = ['seed_18238299.json.gz', 'seed_94520636.json.gz', 'seed_94520637.json.gz']
with mp.Pool(mp.cpu_count() - 1) as pool:
    universes = pool.map(partial(process_universe, passthrough), subset, chunksize=100)
universe = universes[0]

In [None]:
def distance(star):
    return sum(np.fromiter(star['position'].values(), dtype='float') ** 2) ** 0.5

distance_df = pd.DataFrame.from_records(
    data=[(star_name, distance(star)) for star_name, star in universe['star'].items()], 
    columns=['star_name', 'abs. dist.']
).sort_values('abs. dist.')
distance_df

In [None]:
home_star = distance_df.iloc[0]
assert home_star['abs. dist.'] == 0.
home_system = universe['star'][home_star['star_name']]

In [None]:
def map_strings(universe):
    planet_types = []
    star_types = []
    singularities = []
    planet_count = 0
    for star in universe['star'].values():
        try:
            star_types.append((star['type'], star['typeString']))
        except KeyError:
            pass
        for planet in star['planet'].values():
            try:
                singularities.append((planet['singularity'], planet['singularityString']))
            except KeyError:
                pass
            try:
                planet_types.append((planet['type'], planet['typeString']))
            except KeyError:
                pass
            planet_count += 1
    return (
        np.unique(planet_types, axis=0), 
        np.unique(singularities, axis=0), 
        np.unique(star_types, axis=0), 
        planet_count
    )


with mp.Pool(mp.cpu_count() - 1) as pool:
    planet_types, singularities, star_types, planet_count = zip(*pool.imap_unordered(partial(process_universe, map_strings), files, chunksize=100))
planet_types = np.unique(np.concatenate([x for x in planet_types if len(x) > 0]), axis=0)
singularities = np.unique(np.concatenate([x for x in singularities if len(x) > 0]), axis=0)
star_types = np.unique(np.concatenate([x for x in star_types if len(x) > 0]), axis=0)


In [None]:
display(pd.DataFrame.from_records(planet_types, columns=['code', 'type']))
display(pd.DataFrame.from_records(star_types, columns=['code', 'type']))

In [None]:
OIL_SPEED_MULTIPLIER = 4e-5
DYSON_SPHERE_BUFFER = 1.35

def reorder(df):
    resource_cols = vein_types + gas_types
    first_cols = [x for x in df.columns if x not in resource_cols]
    df[resource_cols] = df[resource_cols].fillna(0)
    return df[first_cols + resource_cols]

def scan_systems(universe):
    home_star_id = universe['meta']['birthStarId']
    home_planet_id = universe['meta']['birthPlanetId']
    seed = universe['meta']['seed']
    mtime = universe['meta']['mtime']
    stars = []
    planets = []
    
    for star in universe['star'].values():
        max_dyson_rad = star['dysonRadius'] * 2

        star_dict = {
            'seed': seed,
            'star': star['name'],
            'home_system': star['id'] == home_star_id,
            'spectrum': spectra_map[star['spectr']],
            'type': star_type_map[star['type']],
            'luminosity': star['luminosity'],
        }
        stars.append(star_dict)

        for planet in star['planet'].values():
            planet_dict = {
                'seed': seed,
                'star': star['name'],
                'planet': planet['name'],
                'home_system': star['id'] == home_star_id,
                'homeworld': planet['id'] == home_planet_id,
                'type': planet.get('typeString', None),
                'tidal_lock': (planet['singularity'] & singularity_flags['TidalLocked']) > 0,
                'in_sphere': planet['sunDistance'] <= max_dyson_rad / DYSON_SPHERE_BUFFER,
                'ocean': planet.get('waterItem', None),
                'landPercent': planet.get('landPercent', 0.),
            }
            if 'vein' in planet:
                planet_veins = {vein_type_map[k]: v for k, v in planet['vein'].items()}
            elif 'gas' in planet:
                planet_veins = {(k + ' gas'): float(v['gasSpeed']) for k,v in planet['gas'].items()}
            else:
                raise ValueError('Bad planet: {}'.format(planet['name']))
            assert all(isinstance(x, (int, float)) for x in planet_veins.values()), planet_veins
            planets.append({**planet_dict, **planet_veins})
            
    planet_df = pd.DataFrame.from_records(planets)
    planet_df['Oil'] *= OIL_SPEED_MULTIPLIER
    
    star_df = pd.DataFrame.from_records(stars)
    systems = planet_df.groupby('star')
    star_resources = systems.sum().reindex(vein_types + gas_types, axis='columns')
    star_resources['planets'] = systems['planet'].count()
    oceans = (systems['ocean'].value_counts().unstack() > 0)[['Water', 'Sulfuric acid']]
    oceans.reindex(star_df['star'], fill_value=False)
    star_df = (star_df
               .merge(star_resources, left_on='star', right_index=True, validate='1:1')
               .merge(oceans.reindex(star_df['star'], fill_value=False), 
                      left_on='star', right_index=True, validate='1:1'))
    
    seed_df =  pd.DataFrame.from_records([{'seed': seed, 'ts': mtime}])
    
    return star_df, planet_df, seed_df

star_df, planet_df, seed_df = scan_systems(universe)
display(star_df.head())
display(planet_df.head())
display(seed_df.head())

In [None]:
with mp.Pool(mp.cpu_count() - 1) as pool:
    star_dfs, planet_dfs, seed_dfs = zip(*pool.imap_unordered(partial(process_universe, scan_systems), files, chunksize=100))
star_df = reorder(pd.concat(star_dfs))
planet_df = reorder(pd.concat(planet_dfs))
seed_df = pd.concat(seed_dfs)

In [None]:
home_systems = star_df.query('home_system == True').copy()
home_systems.head()

In [None]:
home_system_planets = planet_df.query('home_system == True').copy()
home_system_planets.head()

In [None]:
home_system_planets['TL|IS'] = home_system_planets['tidal_lock'] | home_system_planets['in_sphere']
home_system_planets['TL&IS'] = home_system_planets['tidal_lock'] & home_system_planets['in_sphere']
tl_is = home_system_planets.groupby(['seed', 'star'])[['TL|IS', 'TL&IS', 'tidal_lock', 'in_sphere']].sum()
pd.DataFrame({col: tl_is.reset_index().groupby(col)['star'].count()
              for col in tl_is.columns}).fillna(0).astype(int)

In [None]:
homeworlds = home_system_planets.query('homeworld == True')
landmass = homeworlds[['seed', 'landPercent']].rename(columns={'landPercent': 'land'})

In [None]:
res_df = star_df.copy()
res_df[vein_types + gas_types] = res_df[vein_types + gas_types] > 0
res_probs = res_df.groupby(['spectrum', 'type'])[ocean_types
                                                 + vein_types 
                                                 + gas_types
                                                ].mean()
res = 'Organic Crystal'
res_probs.loc[res_probs[res] > 0].sort_values(res, ascending=False).style.format(precision=2)

In [None]:
def S_scaler(f):
    return '{:.2f}/s'.format(f)

def M_scaler(f):
    if f < 10e6:
        return '{:.1f}'.format(f / 1e6)
    else:
        return '{:.0f}'.format(f / 1e6)

def f2_scaler(f):
    return '{:.2f}'.format(f)
    
shaders = {
    'limit': M_scaler,
    'land': f2_scaler,
    **{col: S_scaler for col in S_scaled},
    **{col: M_scaler for col in M_scaled},
    **{col + ' (Start)': S_scaler for col in S_scaled},
    **{col + ' (Start)': M_scaler for col in M_scaled},
}
formatters = {**shaders,
    'ts': lambda x: x.strftime('%b-%d'),
    'rank': f2_scaler,
}

def shader(series, pct):
    pct_subset = pct.loc[series.index, series.name] ** (4/5)
    cmap = mpl.cm.get_cmap('coolwarm_r')
    rgb = [sns.set_hls_values(color=cmap(x), h=None, l=0.75, s=None) for x in pct_subset]
    return ['background: {};'.format(mpl.colors.rgb2hex(x)) for x in rgb]

def show(pct, df):
    shade_cols = [x for x in df.columns if x in shaders.keys()]
    display(HTML(
        df.style
        .apply(func=shader, pct=pct[shade_cols], axis='index', subset=shade_cols)
        .format(formatter=formatters).to_html()
    ))

In [None]:
home_resources = home_systems.set_index('seed')[base_types + ['Oil', 'Fire ice', 'Fire ice gas']]
home_resources['limit'] =  home_resources[base_types].min(axis='columns')

seed_res = (
    tl_is.reset_index(level='star')
    .merge(seed_df, on='seed', validate='1:1')
    .merge(landmass, on='seed', validate='1:1')
    .merge(home_resources, on='seed', validate='1:1')
    .merge(
        star_df.groupby('seed')[ocean_types + vein_types + gas_types].sum(), 
        on='seed', validate='1:1', suffixes=[' (Start)', ''])
).set_index('seed')

In [None]:
recent = dt.now() - td(days=7)

potential = (
    seed_res
    .query('(`Fire ice (Start)` > 1e6) | ((`Fire ice (Start)` > 0) & (`Fire ice gas (Start)` > 0))')
    # .query('`ts` > @recent')
    .query('`TL|IS` >= 1')
    # .query('`tidal_lock` >= 1')
    # .query('limit > 6e6')
    # .query('`Organic Crystal` > 50e6')
    # .query('`land` > .6')
    
    # .sort_values('limit', ascending=False)
    # .sort_values('Organic Crystal', ascending=False)
    
    .drop(columns=['TL|IS', 'TL&IS'])
    .rename(columns={'tidal_lock': 'TL', 'in_sphere': 'IS'})
)

all_pct = seed_res.rank(pct=True, method='min').loc[potential.index]
potential_pct = potential.rank(pct=True, method='min')
rank_pct = potential_pct

rsrc_cols = vein_types + gas_types
start_rsrc_cols = [
    'Iron (Start)', 'Copper (Start)', 'Silicon (Start)', 'Titanium (Start)', 'Stone (Start)', 'Coal (Start)', 'Oil (Start)']
useful_rsrc_cols = [
    'Iron', 'Copper', 'Silicon', 'Titanium', 'Stone', 'Coal', 'Oil',
    'Organic Crystal', 'Hydrogen gas', 'Fire ice gas']


# rank_cols = ['land', 'limit'] + start_rsrc_cols + useful_rsrc_cols
# rank_cols = start_rsrc_cols + useful_rsrc_cols
# rank_cols = ['limit', 'Oil', 'Organic Crystal', 'Silicon (Start)', 'Oil (Start)']
rank_cols = useful_rsrc_cols

sort_val = rank_pct[rank_cols].min(axis='columns')
# sort_val = rank_pct[rank_cols].quantile(0.1, axis='columns')
# sort_val = rank_pct[rank_cols].mean(axis='columns')

potential.insert(1, 'rank', sort_val)
sort_idx = sort_val.sort_values(ascending=False).index
potential = potential.loc[sort_idx]
pre_run = potential.index.isin(previous_runs)
show(rank_pct, potential.loc[pre_run])
show(rank_pct, potential.loc[~pre_run].head(15))


In [None]:
fig, axs = plt.subplots(2, 1, figsize=[10,10], sharex=True, sharey=True)
(seed_res[[x for x in seed_res.columns if 'Start' in x and 'Oil' not in x and 'gas' not in x]] / 1e6).plot.kde(ax=axs[0])
(potential[[x for x in potential.columns if 'Start' in x and 'Oil' not in x and 'gas' not in x]] / 1e6).plot.kde(ax=axs[1])
plt.xlim(0, 40)
plt.ylim(0, .4)


In [None]:
seed_res['Oil (Start)'].plot.kde()
potential['Oil (Start)'].plot.kde()


In [None]:
seed_res.query('`Fire ice gas (Start)` > 0')['Fire ice gas (Start)'].plot.kde()
potential.query('`Fire ice gas (Start)` > 0')['Fire ice gas (Start)'].plot.kde()

In [None]:
cmap = mpl.cm.get_cmap('coolwarm_r')

gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))


def plot_color_gradients(cmap_category, power_list):
    nrows = len(power_list)
    figh = 0.35 + 0.15 + (nrows + (nrows-1)*0.1)*0.22
    fig, axs = plt.subplots(nrows=nrows, figsize=(6.4, figh))
    fig.subplots_adjust(top=1-.35/figh, bottom=.15/figh, left=0.2, right=0.99)

    axs[0].set_title(cmap_category + ' colormaps', fontsize=14)

    for ax, power in zip(axs, power_list):
        ax.imshow(gradient ** power, aspect='auto', cmap=cmap)
        ax.text(-.01, .5, '{:.3f}'.format(power), va='center', ha='right', fontsize=10,
                transform=ax.transAxes)

    # Turn off *all* ticks & spines, not just the ones with colormaps.
    for ax in axs:
        ax.set_axis_off()

power_list = [1, 4/5, 3/4, 2/3, 1/2]
plot_color_gradients('Powers', power_list)
