## Plot histograms from Small Body Database (SBDB)

Calculations to inform:

* Distribution of asteroid masses (M)
* Velocity of asteroids (V_NEA)
* Velocity of LISA (V_LISA)
* Relative velocity (V)
* Distribution of impact parameter (D)

In [None]:
import warnings
warnings.simplefilter('ignore', FutureWarning)
from pandas import *
import numpy as np
%matplotlib inline
from calendar import monthrange
from math import *
from ipynb.fs.defs import utils

In [None]:
# Read in SBDB from local file
# DtypeWarning: Columns (3,4,15,22,23,69) have mixed types.Specify dtype option on import or set low_memory=False.
SBDB_AAAA = read_csv('SBDB/sbdb_query_results_asteroids_AAAA_20220703.csv', low_memory=False)
SBDB_GMdefined = read_csv('SBDB/sbdb_query_results_all_GMdefined_20220703.csv', low_memory=False)

In [None]:
# Get SBDB from API documented at https://ssd-api.jpl.nasa.gov/doc/sbdb_query.html

In [None]:
SBDB_AAAA.head(10)

In [None]:
utils.describe_df(SBDB_AAAA)

In [None]:
# Documentation on columns
# https://ssd-api.jpl.nasa.gov/sbdb_query.api?info=fields

import requests
import json
response_info = json.loads(requests.get('https://ssd-api.jpl.nasa.gov/sbdb_query.api?info=field').text)

In [None]:
def get_fields(response_info):
    fields = []
    for ntdu in response_info:
        fields.append([ntdu['name'],ntdu['title'],ntdu['description'],ntdu['units']])
    dataframe = DataFrame(data=fields, columns=['Name', 'Title', 'Description', 'Units'])
    return dataframe

In [None]:
object_df = get_fields(response_info['info']['field']['object']['list'])
phys_par_df = get_fields(response_info['info']['field']['orbit']['list'])
orbit_df = get_fields(response_info['info']['field']['phys_par']['list'])

In [None]:
print('For all asteroids')
print(f"Number of asteroids defined = {SBDB_AAAA.shape[0]}")
print(f"Number of asteroids with defined mass = {SBDB_AAAA.dropna(subset=['GM']).shape[0]}")
print(f"Number of asteroids with defined diameter = {SBDB_AAAA.dropna(subset=['diameter']).shape[0]}")
# print(f"Number of asteroids with defined extent = {SBDB_AAAA.dropna(subset=['extent']).shape[0]}")
# print(f"Number of asteroids with defined albedo = {SBDB_AAAA.dropna(subset=['albedo']).shape[0]}")
# print(f"Number of asteroids with defined diameter and albedo = {SBDB_AAAA.dropna(subset=['diameter','albedo']).shape[0]}")
print(f"Number of asteroids with defined absolute magnitude H and albedo = {SBDB_AAAA.dropna(subset=['H','albedo']).shape[0]}")
print(f"Number of asteroids with defined absolute magnitude H = {SBDB_AAAA.dropna(subset=['H']).shape[0]}")

In [None]:
# Data on sigma of GM from https://ssd.jpl.nasa.gov/tools/sbdb_lookup.html
data_sigma_GM = {'spkid':[
                    2000001,
                    2000002,
                    2000004,
                    2000010,
                    2000016,
                    2000022,
                    2000107,
                    2000243,
                    2000253,
                    2000433,
                    2000704,
                    2025143,
                    2101955,
                    2162173,
                    2185851,
                    ],
                 'sigma_GM':[
                    0.0009,
                    0.18,
                    0.000012,
                    0,
                    0.04,
                    0.05,
                    0.0022,
                    0.00035,
                    0.00030,
                    0.001e-4,
                    0,
                    0.063e-9,
                    0.0009e-9,
                    0.04e-8,
                    3e-9,
                    ]
                }
df_sigma_GM = DataFrame(data_sigma_GM)

# Merge dataframes on the SPKID
SBDB_with_mass = merge(SBDB_GMdefined, df_sigma_GM, on='spkid', how='inner')
G = 6.67408e-11 # google

# Calculate mass, converting from km^3 to m^3
SBDB_with_mass['M'] = SBDB_with_mass['GM']*1e3**3/G
SBDB_with_mass['sigma_M'] = SBDB_with_mass['sigma_GM']*1e3**3/G
SBDB_with_mass['sigma_over_M'] = SBDB_with_mass['sigma_GM']/SBDB_with_mass['GM']*100
SBDB_with_mass['log10(M)'] = SBDB_with_mass['M'].apply(log10)
df_output = SBDB_with_mass[['full_name','GM','sigma_GM','M','sigma_M','sigma_over_M','log10(M)']]
df_output.to_csv('SBDB/SBDB_with_mass.csv')
df_output

In [None]:
from lisaconstants import ASTRONOMICAL_UNIT, SPEED_OF_LIGHT
ASTRONOMICAL_UNIT, SPEED_OF_LIGHT

In [None]:
perihelion, aphelion = (0.9942402780791028, 1.0048137369425845) # calculated from lisa-orbits.ipynb from esa-orbits.h5
# perihelion, aphelion = (0.983, 1.017) # Earth from asteroid classifications
perihelion, aphelion

In [None]:
SBDB_apollo = SBDB_AAAA[
    ((SBDB_AAAA.a > 1) & (SBDB_AAAA.q < aphelion)) 
]
SBDB_apollo.shape

In [None]:
SBDB_atens = SBDB_AAAA[
    ((SBDB_AAAA.a < 1) & (SBDB_AAAA.ad > perihelion)) 
]
SBDB_atens.shape

In [None]:
SBDB = SBDB_AAAA[
    ((SBDB_AAAA.a < 1) & (SBDB_AAAA.ad > perihelion)) |
    ((SBDB_AAAA.a > 1) & (SBDB_AAAA.q < aphelion))
]
SBDB.shape

In [None]:
print(f'About the Atens and Apollos:')
print(f"Number of asteroids defined = {SBDB.shape[0]}")
print(f"Number of asteroids with defined mass = {SBDB.dropna(subset=['GM']).shape[0]}")
print(f"Number of asteroids with defined diameter = {SBDB.dropna(subset=['diameter']).shape[0]}")
# print(f"Number of asteroids with defined extent = {SBDB.dropna(subset=['extent']).shape[0]}")
# print(f"Number of asteroids with defined albedo = {SBDB.dropna(subset=['albedo']).shape[0]}")
# print(f"Number of asteroids with defined diameter and albedo = {SBDB.dropna(subset=['diameter','albedo']).shape[0]}")
print(f"Number of asteroids with defined absolute magnitude H and albedo = {SBDB.dropna(subset=['H','albedo']).shape[0]}")
print(f"Number of asteroids with defined absolute magnitude H = {SBDB.dropna(subset=['H']).shape[0]}")
print(f"Number of asteroids with defined spectral class (SMASSII) = {SBDB.dropna(subset=['spec_B']).shape[0]}")
print(f"Number of asteroids with defined spectral class (Tholen) = {SBDB.dropna(subset=['spec_T']).shape[0]}")

In [None]:
# Merge dataframes on the SPKID
SBDB_with_mass = merge(SBDB, df_sigma_GM, on='spkid', how='inner')
G = 6.67408e-11 # google

# Calculate mass, converting from km^3 to m^3
SBDB_with_mass['M'] = SBDB_with_mass['GM']*1e3**3/G
SBDB_with_mass['sigma_M'] = SBDB_with_mass['sigma_GM']*1e3**3/G
SBDB_with_mass['log10(M)'] = SBDB_with_mass['M'].apply(log10)
df_output = SBDB_with_mass[['full_name','GM','sigma_GM','M','sigma_M','log10(M)']]
df_output.to_csv('SBDB/LISA_crossing_asteroids_with_mass.csv')
df_output

In [None]:
# Calculate the mass of the asteroids with diameters defined
SBDB_with_size = SBDB.dropna(subset=['diameter'])
density = 2000. # kg m-3

def mass(diameter):
    radius_m = diameter*1.e3/2.
    volume = 4./3.*pi*radius_m**3
    mass = volume*density
    return mass

options.mode.chained_assignment = None
SBDB_with_size['M'] = SBDB_with_size['diameter'].apply(mass)
SBDB_with_size['log10(M)'] = SBDB_with_size['M'].apply(log10)
SBDB_with_size

In [None]:
SBDB_with_size['M'].min()

In [None]:
SBDB_with_size['M'].max()

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(nrows=1, 
                        ncols=1, 
                        # sharex='col', 
                        sharey='row', 
                        squeeze=True, 
                        figsize=(6, 4))
plt.hist(SBDB_with_size['log10(M)'], bins=np.arange(2,17,1.0))
ax.set(ylabel = 'Frequency', xlabel = 'log(M [kg])')
fig.patch.set_alpha(1)
fig.tight_layout()
plt.savefig('sbdb_with_diameter.png')
plt.show()
plt.close()
print(SBDB_with_size['log10(M)'].min(),SBDB_with_size['log10(M)'].max(),SBDB_with_size['log10(M)'].mean())

# bin = np.arange(3,9) # len + 1
# figure=plt.figure(1)
# plt.hist(log10(SBDB_with_size['M']),bins=bin, facecolor='blue',edgecolor='black')
# # shift ticks by .5
# plt.xticks(bin-.5, bin)
# plt.xlabel = 'evaluation'
# figure.ylabel = 'No of problems'
# plt.title = 'Evaluations Distribution Histogram'


In [None]:
# Calculate the mass of the asteroids with absolute magnitude and albedo defined
SBDB_with_Ha = SBDB.dropna(subset=['H','albedo'])

def diameter(H, albedo): # http://www.physics.sfasu.edu/astro/asteroids/sizemagnitude.html
    diameter = 1329./sqrt(albedo)*10.**(-0.2*H)
    return diameter # km

options.mode.chained_assignment = None
SBDB_with_Ha['M'] = SBDB_with_Ha[['H','albedo']].apply(lambda x: diameter(*x), axis=1).apply(mass)
SBDB_with_Ha['log10(M)'] = SBDB_with_Ha['M'].apply(log10)
SBDB_with_Ha

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(nrows=1, 
                        ncols=1, 
                        # sharex='col', 
                        sharey='row', 
                        squeeze=True, 
                        figsize=(6, 4))
plt.hist(SBDB_with_Ha['log10(M)'], bins=np.arange(2,17,1.0))
ax.set(ylabel = 'Frequency', xlabel = 'log(M [kg])')
fig.patch.set_alpha(1)
fig.tight_layout()
plt.savefig('sbdb_with_Ha.png')
plt.show()
plt.close()

print(SBDB_with_Ha['log10(M)'].min(), SBDB_with_Ha['log10(M)'].max(), SBDB_with_Ha['log10(M)'].mean())

In [None]:
# Calculate the mass of the asteroids with absolute magnitude and albedo defined
SBDB_with_H = SBDB.dropna(subset=['H'])

albedo_default = 0.2

def diameter(H): # with default albedo
    diameter = 1329./sqrt(albedo_default)*10.**(-0.2*H)
    return diameter # km

options.mode.chained_assignment = None
SBDB_with_H['M'] = SBDB_with_H[['H']].apply(diameter).apply(mass)
SBDB_with_H['log10(M)'] = SBDB_with_H['M'].apply(log10)
SBDB_with_H

In [None]:
import matplotlib.pyplot as plt


fig, ax = plt.subplots(nrows=1, 
                        ncols=1, 
                        # sharex='col', 
                        sharey='row', 
                        squeeze=True, 
                        figsize=(6, 4))
plt.hist(SBDB_with_H['log10(M)'], bins=np.arange(2,17,1.0))
ax.set(ylabel = 'Frequency', xlabel = 'log(M [kg])')
fig.patch.set_alpha(1)
fig.tight_layout()
plt.savefig('sbdb_with_H.png')
plt.show()
plt.close()
print(SBDB_with_H['log10(M)'].min(), SBDB_with_H['log10(M)'].max(), SBDB_with_H['log10(M)'].mean())

In [None]:
import matplotlib.pyplot as plt


fig, ax = plt.subplots(nrows=1, 
                        ncols=1, 
                        # sharex='col', 
                        sharey='row', 
                        squeeze=True, 
                        figsize=(10, 3))
plt.hist(SBDB_with_H['log10(M)'], bins=np.arange(3,15,1.0))
ax.set(ylabel = 'Frequency', xlabel = 'log(M [kg])', title = 'Estimated masses of NEA with LISA crossing orbits')
fig.patch.set_alpha(1)
fig.tight_layout()
plt.savefig('sbdb_with_H_stretched.png')
plt.show()
plt.close()
print(SBDB_with_H['log10(M)'].min(), SBDB_with_H['log10(M)'].max(), SBDB_with_H['log10(M)'].mean())

In [None]:
import matplotlib.pyplot as plt

# histograms
# https://towardsdatascience.com/histograms-with-pythons-matplotlib-b8b768da9305

facecolor = '#FFFFFF'
color_bars = '#1F77B4' # default bars colour
txt_color1 = '#252525'
txt_color2 = '#004C74'

data1=SBDB_with_H['log10(M)']
data2=SBDB_with_Ha['log10(M)']
data3=SBDB_with_size['log10(M)']

bins=np.arange(2,17,1.0)
fig, ax = plt.subplots(1, figsize=(14,8), facecolor=facecolor)
ax.set_facecolor(facecolor)

# plotting as a percentage of population
# https://stackoverflow.com/questions/51473993/plot-an-histogram-with-y-axis-as-percentage-using-funcformatter
# Standard tableau colours: https://matplotlib.org/stable/tutorials/colors/colors.html
n, bins1, patches = plt.hist(data1, bins=bins, alpha=1, color='tab:blue')
n, bins2, patches = plt.hist(data2,  bins=bins, alpha=0.5, color='tab:orange', zorder=2)
n, bins3, patches = plt.hist(data3, bins=bins, alpha=0.5, color='tab:green')

# from matplotlib.ticker import PercentFormatter
# plt.gca().yaxis.set_major_formatter(PercentFormatter(1))

    # Plot histogram
# data1.plot(kind = "hist", weights=np.ones(len(data1)) / len(data1), alpha = 0.65, bins = bins) # change density to true, because KDE uses density
# data2.plot(kind = "hist", weights=np.ones(len(data2)) / len(data2), alpha = 0.65, bins = bins)
# data3.plot(kind = "hist", weights=np.ones(len(data3)) / len(data3), alpha = 0.65, bins = bins)
    # Plot KDE
# data.plot(kind = "kde")

#grid
plt.grid(axis='y', color=color_bars, lw = 0.5, alpha=0.7)
plt.grid(axis='x', color=facecolor, lw = 0.5)

#remove major and minor ticks from the x axis, but keep the labels
ax.tick_params(axis='y', which='both',length=0) # use axis='both' to turn off tick marks on both axes

# Hide the right and top spines
# ax.spines['bottom'].set_visible(True)
# ax.spines['left'].set_visible(True)
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
# ax.spines['left'].set_position(('outward', 10))

# plt.xticks(c=txt_color1, fontsize=12)
# plt.yticks(c=txt_color1, fontsize=12)

# showing quantiles 
# https://towardsdatascience.com/take-your-histograms-to-the-next-level-using-matplotlib-5f093ad7b9d3
# quant_5, quant_25, quant_50, quant_75, quant_95 = data1.quantile(0.05), data1.quantile(0.25), data1.quantile(0.5), data1.quantile(0.75), data1.quantile(0.95)
# quants = [[quant_5, 0.6, 0.16], [quant_25, 0.8, 0.26], [quant_50, 1, 0.36],  [quant_75, 0.8, 0.46], [quant_95, 0.6, 0.56]]
# for i in quants:
#     ax.axvline(i[0], alpha = i[1], ymax = i[2], linestyle = ":", color=txt_color1)

# X
ax.set_xlabel("log10(M)")
    # Limit x range to 0-4
x_start, x_end = 2, 16
ax.set_xlim(x_start, x_end)

# Y
# ax.set_ylim(0, 1)
# ax.set_yticklabels([])
ax.set_ylabel("Frequency")

# ax.text(quant_5-.1, 0.17, "5th", size = 10, alpha = 0.8)
# ax.text(quant_25-.13, 0.27, "25th", size = 11, alpha = 0.85)
# ax.text(quant_50-.13, 0.37, "50th", size = 12, alpha = 1)
# ax.text(quant_75-.13, 0.47, "75th", size = 11, alpha = 0.85)
# ax.text(quant_95-.25, 3800, "95th Percentile", size = 10, alpha =.8)

# plt.title('Histogram of Space Missions Dates\n', loc = 'center', fontsize = 20, c=txt_color1)
# plt.xlabel('log10(M)', c=txt_color1, fontsize=14)
# plt.ylabel('Frequency', c=txt_color1, fontsize=14)
# plt.tight_layout()
# plt.savefig('hist.png', facecolor=facecolor)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# histograms
# https://towardsdatascience.com/histograms-with-pythons-matplotlib-b8b768da9305

facecolor = '#FFFFFF'
color_bars = '#1F77B4' # default bars colour
txt_color1 = '#252525'
txt_color2 = '#004C74'

data1=SBDB_with_H['log10(M)']
data2=SBDB_with_Ha['log10(M)']
data3=SBDB_with_size['log10(M)']

bins=np.arange(2,17,1.0)
fig, ax = plt.subplots(1, figsize=(14,8), facecolor=facecolor)
ax.set_facecolor(facecolor)

# plotting as a percentage of population
# https://stackoverflow.com/questions/51473993/plot-an-histogram-with-y-axis-as-percentage-using-funcformatter
# Standard tableau colours: https://matplotlib.org/stable/tutorials/colors/colors.html
n, bins1, patches = plt.hist(data1, weights=np.ones(len(data1)) / len(data1), bins=bins, alpha=1, color='tab:blue')
n, bins2, patches = plt.hist(data2, weights=np.ones(len(data2)) / len(data2), bins=bins, alpha=0.5, color='tab:orange')
n, bins3, patches = plt.hist(data3, weights=np.ones(len(data3)) / len(data3), bins=bins, alpha=0.5, color='tab:green')

from matplotlib.ticker import PercentFormatter
plt.gca().yaxis.set_major_formatter(PercentFormatter(1))

    # Plot histogram
# data1.plot(kind = "hist", weights=np.ones(len(data1)) / len(data1), alpha = 0.65, bins = bins) # change density to true, because KDE uses density
# data2.plot(kind = "hist", weights=np.ones(len(data2)) / len(data2), alpha = 0.65, bins = bins)
# data3.plot(kind = "hist", weights=np.ones(len(data3)) / len(data3), alpha = 0.65, bins = bins)
    # Plot KDE
# data.plot(kind = "kde")

#grid
plt.grid(axis='y', color=color_bars, lw = 0.5, alpha=0.7)
plt.grid(axis='x', color=facecolor, lw = 0.5)

#remove major and minor ticks from the x axis, but keep the labels
ax.tick_params(axis='y', which='both',length=0) # use axis='both' to turn off tick marks on both axes

# Hide the right and top spines
# ax.spines['bottom'].set_visible(True)
# ax.spines['left'].set_visible(True)
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
# ax.spines['left'].set_position(('outward', 10))

# plt.xticks(c=txt_color1, fontsize=12)
# plt.yticks(c=txt_color1, fontsize=12)

# showing quantiles 
# https://towardsdatascience.com/take-your-histograms-to-the-next-level-using-matplotlib-5f093ad7b9d3
# quant_5, quant_25, quant_50, quant_75, quant_95 = data1.quantile(0.05), data1.quantile(0.25), data1.quantile(0.5), data1.quantile(0.75), data1.quantile(0.95)
# quants = [[quant_5, 0.6, 0.16], [quant_25, 0.8, 0.26], [quant_50, 1, 0.36],  [quant_75, 0.8, 0.46], [quant_95, 0.6, 0.56]]
# for i in quants:
#     ax.axvline(i[0], alpha = i[1], ymax = i[2], linestyle = ":", color=txt_color1)

# X
ax.set_xlabel("log10(M)")
    # Limit x range to 0-4
x_start, x_end = 2, 16
ax.set_xlim(x_start, x_end)

# Y
# ax.set_ylim(0, 1)
# ax.set_yticklabels([])
ax.set_ylabel("Percentage of the sample")

# ax.text(quant_5-.1, 0.17, "5th", size = 10, alpha = 0.8)
# ax.text(quant_25-.13, 0.27, "25th", size = 11, alpha = 0.85)
# ax.text(quant_50-.13, 0.37, "50th", size = 12, alpha = 1)
# ax.text(quant_75-.13, 0.47, "75th", size = 11, alpha = 0.85)
# ax.text(quant_95-.25, 3800, "95th Percentile", size = 10, alpha =.8)

# plt.title('Histogram of Space Missions Dates\n', loc = 'center', fontsize = 20, c=txt_color1)
# plt.xlabel('log10(M)', c=txt_color1, fontsize=14)
# plt.ylabel('Frequency', c=txt_color1, fontsize=14)
# plt.tight_layout()
# plt.savefig('hist.png', facecolor=facecolor)
plt.show()

In [None]:
# Velocities of NEA!
GM_sun = 1.32712440018E20 # m3 s-2
AU = 1.495978707E11 # m from google

def velocity_at_1AU(a):
    # https://en.wikipedia.org/wiki/Orbital_speed#Instantaneous_orbital_speed
    # known as the vis-viva equation
    v = sqrt(abs(GM_sun*(2./AU - 1./(a*AU))))
    return v

In [None]:
SBDB_with_a = SBDB.dropna(subset=['a'])
# SBDB_with_a[SBDB_with_a['a'] != 0]

SBDB_with_a['v'] = SBDB_with_a['a'].apply(velocity_at_1AU)
SBDB_with_a


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(nrows=1, 
                        ncols=1, 
                        # sharex='col', 
                        sharey='row', 
                        squeeze=True, 
                        figsize=(6, 4))
plt.hist(SBDB_with_a['v']/1e3, bins=np.arange(10,45,1))
ax.set(ylabel = 'Frequency', xlabel = 'NEA velocity [km s-1]')
fig.patch.set_alpha(1)
fig.tight_layout()
plt.savefig('sbdb_velocity.png')
plt.show()
plt.close()

print(SBDB_with_a['v'].min(), SBDB_with_a['v'].max(), SBDB_with_a['v'].mean())