In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from joblib import Parallel, delayed

#### Disable auto scrolling of output

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

# Global Settings

In [3]:
# Set low mass limits for clouds
lowLim512  = 8.  # Still tbd
lowLim1024 = 4.  # Still tbd
lowLim2048 = 1.

# Choose which columns we're interested in, i.e. which 'features' we want to investigate
pairPlotColumns = ['logVolume',  
                   'logMass', 
                   'logDensity', 
                   'rPosition', 
                   'zPositionAbs', 
                   'polarAngle', 
                   'vMag', 
                   'noiseTime']

# Set labels
labels = [r'Volume' + '\n' + r'($\log{\left(pc^3\right)}$)',
          r'Mass' + '\n' + r'($\log{\left(M_\odot\right)}$)',
          r'Density' + '\n' + r'($\log{\left(M_\odot/pc^3\right)}$)',
          r'Radial Position' + '\n' + r'($pc$)',
          r'Absolute Z Position' + '\n' + r'($pc$)',
          r'Polar Angle' + '\n' + r'(radians)',
          r'Velocity Magnitude' + '\n' + r'($km/s$)',
          r'Time' + '\n' + r'($Myr$)',]

# Testing
testing = False  # If true plot only the different styles of plots. If false only plot the 3 different resolutions in the chosen style

# Import and setup Dataframes

In [4]:
# Load the datasets and print headers
catalog512 = np.load('../data/physCatalog512.npy')
catalog1024 = np.load('../data/physCatalog1024.npy')
catalog2048 = np.load('../data/physCatalog2048.npy')

In [5]:
# Convert to dataframes with mass, volume, radial distance, and magnitude of velocity

# ==============================================================================
def converter(catalog, lowLim):
    # Copy data to the dataFrame
    outputDF = pd.DataFrame()
    
    outputDF['ID']         = catalog['ID']
    outputDF['volume']     = catalog['volume']
    outputDF['mass']       = catalog['mass']
    outputDF['density']    = outputDF['mass'] / outputDF['volume']
    outputDF['logVolume']  = np.log10(outputDF['volume']) 
    outputDF['logMass']    = np.log10(outputDF['mass'])
    outputDF['logDensity'] = np.log10(outputDF['density'])
    outputDF['rPosition']  = np.sqrt(catalog['positionX']**2 
                                     + catalog['positionY']**2 
                                     + catalog['positionZ']**2)
    outputDF['zPosition']    = catalog['positionZ'].reshape(-1, 1)
    outputDF['zPositionAbs'] = np.abs(outputDF['zPosition'])
    outputDF['vMag']         = np.sqrt(catalog['velocityX']**2 
                                     + catalog['velocityY']**2 
                                     + catalog['velocityZ']**2)
    outputDF['polarAngle'] = np.arccos(np.abs(outputDF['zPosition'])/outputDF['rPosition'])
    outputDF['resolution'] = catalog['resolution']
    outputDF['time']       = catalog['time']
    outputDF['noiseTime']  = outputDF['time'] + np.random.uniform(low  = -0.5, 
                                                                  high =  0.5, 
                                                                  size = outputDF['time'].shape[0])
    
    # Cut out the low mass clouds that are poorly sampled
    catalog = catalog[catalog['mass'] > lowLim]

    return outputDF
# ==============================================================================

processed512  = converter(catalog512,  lowLim512)
processed1024 = converter(catalog1024, lowLim1024)
processed2048 = converter(catalog2048, lowLim2048)

# Start Plotting 

## Different plot styles

### Scatter/Histogram

Works ok, it's hard to see contours though

In [6]:
if testing:
    # Setup the plot and return the plot object
    figGrid = sns.pairplot(processed512[pairPlotColumns].sample(1000),
                           kind      = 'scatter',
                           diag_kind = 'hist',
                           corner    = True,
                           hue       = 'time',
                           palette   = 'flare',
                           plot_kws  ={'alpha': 0.2})

    plt.show()

### KDE/KDE

Probably the best option. Clear contours

In [7]:
if testing:
    # Setup the plot and return the plot object
    figGrid = sns.pairplot(processed512[pairPlotColumns].sample(1000),
                           kind   = 'kde',
                           corner = True)

    plt.show()

### Univariate Histogram/Histogram

Better than the scatter plots but still hard to see the contours

In [8]:
if testing:
    # Setup the plot and return the plot object
    figGrid = sns.pairplot(processed512[pairPlotColumns].sample(1000),
                           kind      = 'hist',
                           diag_kind = 'hist',
                           corner    = True,
                           hue       = 'time',
                           palette   = 'flare')

    plt.show()

# Plotting

In [9]:
def pairPlotter(catalog):
    # Set the plot kind
#     plotKind = 'hist'
    plotKind = 'kde'
    
    # Find resolution
    resolution = catalog['resolution'].iloc[0]
    
    # Set palette
    sns.set_palette('crest')
    
    # Setup the plot and return the plot object
    figGrid = sns.pairplot(catalog[pairPlotColumns],
                           kind   = plotKind,
                           corner = True)

    # Set the labels
    a = figGrid.axes.shape[0]
    for i in range(a):
        figGrid.axes[a-1,i].xaxis.set_label_text(labels[i], fontsize=15)
        figGrid.axes[i,  0].yaxis.set_label_text(labels[i], fontsize=15)

    # Title
    figGrid.fig.suptitle(f'Features Pairplot for the {resolution} Resolution Dataset', fontsize=30)
    
    # Make sure there's enough room for the axis titles etc
    plt.tight_layout()

    # Save the plot
    plt.savefig(f"/users/bob/desktop/{resolution}-{plotKind}.png")

In [10]:
# Make all the plots in parallel
if not testing:
    catalogs = [processed512, processed1024, processed2048]
    Parallel(n_jobs=len(catalogs))(delayed(pairPlotter)
                                  (catalog)
                                  for catalog in catalogs)