# Spaceships analyzer

This notebook is used to analyze the spaceships downloaded previously and compute metrics of interest.

## Imports

In [None]:
import numpy as np
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import scipy.stats as stats
import json
import pickle
from tqdm.notebook import tqdm
from pathlib import Path


from pcgsepy.setup_utils import setup_matplotlib
from pcgsepy.xml_conversion import convert_xml_to_structure

## Main variables

In [None]:
RUN_ANALYSIS = False
SPACESHIPS_DIR = './content'
OUTPUT_DIR = '../lsystem/stats'

setup_matplotlib()

fuots = []
fiots = []
volumes = []
lom = []
los = []

In [None]:
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

## Spaceships loading and metrics computing

In [None]:
available_spaceships = os.listdir(SPACESHIPS_DIR)[1:]

def parse_stats():
    for s in tqdm(available_spaceships, desc='Analyzing spaceships...'):
        bp = os.path.join(SPACESHIPS_DIR, s)
        try:
            root = ET.parse(bp).getroot()
            structure = convert_xml_to_structure(root_node=root)
            structure.sanify()

            functional_blocks_volume, filled_volume = 0., 0.
            for b in structure.get_all_blocks():
                if not b.block_type.startswith('MyObjectBuilder_CubeBlock_'):
                    functional_blocks_volume += b.volume
                filled_volume += b.volume

            total_volume = structure.as_array.shape
            total_volume = total_volume[0] * total_volume[1] * total_volume[2]

            fuots.append(functional_blocks_volume / (functional_blocks_volume + filled_volume))
            fiots.append((functional_blocks_volume + filled_volume) / total_volume)
            volumes.append(functional_blocks_volume + filled_volume)

            total_volume = structure.as_array.shape
            largest_axis, medium_axis, smallest_axis = reversed(sorted(list(total_volume)))
            lom.append(largest_axis / medium_axis)
            los.append(largest_axis / smallest_axis)
        except Exception:
            pass

In [None]:
if not os.path.exists('spaceships_stats.json') or RUN_ANALYSIS:
    parse_stats()
    with open('spaceships_stats.json', 'w') as f:
        json.dump({
            'volumes': volumes,
            'fuots': fuots,
            'fiots': fiots,
            'lom': lom,
            'los': los
        }, f)
else:
    with open('spaceships_stats.json', 'r') as f:
        spaceships_stats = json.load(f)
        fuots = spaceships_stats['fuots']
        fiots = spaceships_stats['fiots']
        volumes = spaceships_stats['volumes']
        lom = spaceships_stats['lom']
        los = spaceships_stats['los']

## Estimators fitting and metrics plotting

In [None]:
N_BINS = int(np.ceil(len(volumes) / 3))
VOLUME_CUTOFF = 1e6
COLOR_BINS = '#4CD7D0'
COLOR_LINE = '#2D807B'

In [None]:
to_plot = [v for v in volumes if v <= VOLUME_CUTOFF]
diff = 100 * len(to_plot) / len(volumes)
plt.hist(to_plot, bins=N_BINS, fc=COLOR_BINS)
plt.title('Volume distribution')
plt.ylabel('# occurrences')
plt.xlabel('Volume')
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.autoscale(enable=True, axis='x', tight=True)
plt.savefig('results/stats/stats-volume.png', transparent=True)
plt.show()

In [None]:
from sklearn.neighbors import KernelDensity

to_analyze = [f for f, v in zip(fuots, volumes) if v <= VOLUME_CUTOFF]
mean, std = np.mean(to_analyze), np.std(to_analyze)
x = np.linspace(min(to_analyze), max(to_analyze), N_BINS)
gkde = stats.gaussian_kde(to_analyze)
estimated = gkde.evaluate(x)

with open('estimators/futo.pkl', 'wb') as f:
    pickle.dump(gkde, f)

plt.hist(to_analyze, bins=N_BINS, fc=COLOR_BINS, density=True)
plt.plot(x, estimated, c=COLOR_LINE, lw=2)
plt.title('Functional blocks / total blocks')
plt.ylabel('Density')
plt.xlabel('Proportion')
plt.autoscale(enable=True, axis='x', tight=True)
plt.tight_layout()
plt.savefig('results/stats/stats-futo.png', transparent=True, bbox_inches='tight')
plt.show()

In [None]:
to_analyze = [f for f, v in zip(fiots, volumes) if v <= VOLUME_CUTOFF]
mean, std = np.mean(to_analyze), np.std(to_analyze)
x = np.linspace(min(to_analyze), max(to_analyze), N_BINS)
gkde = stats.gaussian_kde(to_analyze)
estimated = gkde.evaluate(x)

with open('estimators/tovo.pkl', 'wb') as f:
    pickle.dump(gkde, f)

plt.hist(to_analyze, bins=N_BINS, fc=COLOR_BINS, density=True)
plt.plot(x, estimated, c=COLOR_LINE, lw=2)
plt.title('Filled volume / total volume')
plt.ylabel('Density')
plt.xlabel('Proportion')
plt.autoscale(enable=True, axis='x', tight=True)
plt.tight_layout()
plt.savefig('results/stats/stats-tovo.png', transparent=True, bbox_inches='tight')
plt.show()

In [None]:
to_analyze = [f for f, v in zip(lom, volumes) if v <= VOLUME_CUTOFF]
mean, std = np.mean(to_analyze), np.std(to_analyze)
x = np.linspace(min(to_analyze), max(to_analyze), N_BINS)
gkde = stats.gaussian_kde(to_analyze)
estimated = gkde.evaluate(x)

with open('estimators/mame.pkl', 'wb') as f:
    pickle.dump(gkde, f)

plt.hist(to_analyze, bins=N_BINS, fc=COLOR_BINS, density=True)
plt.plot(x, estimated, c=COLOR_LINE, lw=2)
plt.title('Largest axis / Medium axis')
plt.ylabel('Density')
plt.xlabel('Proportion')
plt.autoscale(enable=True, axis='x', tight=True)
plt.tight_layout()
plt.savefig('results/stats/stats-mame.png', transparent=True, bbox_inches='tight')
plt.show()

In [None]:
to_analyze = [f for f, v in zip(los, volumes) if v <= VOLUME_CUTOFF]
mean, std = np.mean(to_analyze), np.std(to_analyze)
x = np.linspace(min(to_analyze), max(to_analyze), N_BINS)
gkde = stats.gaussian_kde(to_analyze)
estimated = gkde.evaluate(x)

with open('estimators/mami.pkl', 'wb') as f:
    pickle.dump(gkde, f)

plt.hist(to_analyze, bins=N_BINS, fc=COLOR_BINS, density=True)
plt.plot(x, estimated, c=COLOR_LINE, lw=2)
plt.title('Largest axis / Smallest axis')
plt.ylabel('Density')
plt.xlabel('Proportion')
plt.autoscale(enable=True, axis='x', tight=True)
plt.tight_layout()
plt.savefig('results/stats/stats-mami.png', transparent=True, bbox_inches='tight')
plt.show()