# Import librairies & data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import random

In [3]:
from asteroid_mining.data import Data

In [4]:
df_neo = Data().get_neo_data()

  if (await self.run_code(code, result,  async_=asy)):


In [5]:
df_acc = Data().get_acc_data()

# Estimate resources per asteroid

## Get % of resources with the spec

### Get the spec of the asteroid

In [6]:
df_acc.shape

(659, 86)

In [7]:
df_acc['spec_B'].head()

0      NaN
1    S(IV)
2      NaN
3       Sq
4        B
Name: spec_B, dtype: object

In [8]:
def clean_spec(x):
    x = str(x).replace(':', '')
    x = str(x).replace('(IV)', '')
    return x

In [9]:
# Clean a bit spec_B column on df_neo & df_acc to have a clearer view
df_neo['spec_B_clean'] = df_neo['spec_B'].apply(clean_spec)
df_acc['spec_B_clean'] = df_acc['spec_B'].apply(clean_spec)

In [10]:
# Inspect spec information on df_acc
spec_counts_acc = df_acc[['id', 'spec_B_clean']].groupby(['spec_B_clean']).count().reset_index()
spec_counts_acc

Unnamed: 0,spec_B_clean,id
0,B,1
1,Cg,1
2,S,4
3,Sq,2
4,X,1
5,,650


In [11]:
# Counting number of asteroids that have spec info
spec_counts_acc['id'].sum()

659

We only have information on spec for 9 asteroids out of the 659 we have in the accessible dataframe. Let's have a look at df_neo:

In [12]:
spec_counts_neo = df_neo[['id', 'spec_B_clean']].groupby(['spec_B_clean']).count().reset_index()
spec_counts_neo

Unnamed: 0,spec_B_clean,id
0,A,1
1,B,5
2,C,13
3,Cb,3
4,Cg,1
5,Ch,1
6,D,4
7,K,7
8,L,6
9,Ld,2


Even if we have a lot of missing info, there is data to exploit to estimate the spec repartition of df_acc

In [13]:
# Delete nan from spec_counts_neo
index_to_del = spec_counts_neo[ spec_counts_neo['spec_B_clean'] == 'nan'].index

In [14]:
index_to_del

Int64Index([26], dtype='int64')

In [15]:
spec_counts_neo.drop(index_to_del , inplace=True)

In [16]:
sum_spec_neo = spec_counts_neo['id'].sum()

In [17]:
# Get percentage of each spec 
spec_counts_neo['per_spec'] = spec_counts_neo['id'].apply(lambda x: x / sum_spec_neo)

In [18]:
spec_counts_neo['cum_perc_spec'] = spec_counts_neo['per_spec'].cumsum()

In [19]:
spec_counts_neo

Unnamed: 0,spec_B_clean,id,per_spec,cum_perc_spec
0,A,1,0.003226,0.003226
1,B,5,0.016129,0.019355
2,C,13,0.041935,0.06129
3,Cb,3,0.009677,0.070968
4,Cg,1,0.003226,0.074194
5,Ch,1,0.003226,0.077419
6,D,4,0.012903,0.090323
7,K,7,0.022581,0.112903
8,L,6,0.019355,0.132258
9,Ld,2,0.006452,0.13871


In [20]:
from random import uniform

In [21]:
a = uniform(0,1)

for i in range(spec_counts_neo.shape[0]):
    if a > spec_counts_neo.loc[i, 'cum_perc_spec']:
        continue
    else:
        new_spec = spec_counts_neo.loc[i, 'spec_B_clean']
        break
        
print(f'{a}: {new_spec} spec')

0.7439003664194354: Sr spec


In [22]:
df_acc.loc[300, 'spec_B_clean']

'nan'

In [23]:
def new_spec_acc(x):
    
    # Check if spec is nan
    if x == 'nan':
    
        a = uniform(0,1)

        for i in range(spec_counts_neo.shape[0]):
            if a > spec_counts_neo.loc[i, 'cum_perc_spec']:
                continue
            else:
                new_spec = spec_counts_neo.loc[i, 'spec_B_clean']
                break
        return new_spec
    
    else:
        return x

In [24]:
df_acc['new_spec'] = df_acc['spec_B_clean'].apply(new_spec_acc)

In [25]:
# Check that each asteroid of df_acc has an estimated spec
df_acc['new_spec']

0      Sr
1       S
2      Sr
3      Sq
4       B
       ..
654    Cb
655    Sq
656     S
657     D
658    Xk
Name: new_spec, Length: 659, dtype: object

### Estimate the % of resources for each asteroid according to its spec

In [26]:
acc_resources = df_acc[['id', 'new_spec']]
acc_resources.head()

Unnamed: 0,id,new_spec
0,a0003361,Sr
1,a0025143,S
2,a0089136,Sr
3,a0099942,Sq
4,a0101955,B


In [27]:
spec_per_resources = {
  '?': {},
  'A': {},
  'B': {
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
    'iron': 10,
  },
  'C': {
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .2,
    'iron': .166,
    'nickel': .014,
    'cobalt': .002,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'Ch': {
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .2,
    'iron': .166,
    'nickel': .014,
    'cobalt': .002,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'Cg': {
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .2,
    'iron': .166,
    'nickel': .014,
    'cobalt': .002,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'Cgh': {
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .2,
    'iron': .166,
    'nickel': .014,
    'cobalt': .002,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'C': {
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .2,
    'iron': .166,
    'nickel': .014,
    'cobalt': .002,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'Cb': {   # transition object between C and B
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .1,
    'iron': .083,
    'nickel': .007,
    'cobalt': .001,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'D': {
    'water': 0.000023,
  },
  'E': {

  },
  'K': {  # cross between S and C
    # from Keck report at http://www.kiss.caltech.edu/study/asteroid/asteroid_final_report.pdf
    'water': .1,
    'iron': .083,
    'nickel': .007,
    'cobalt': .001,

    # volatiles
    'hydrogen': 0.235,
    'nitrogen': 0.001,
    'ammonia': 0.001,
  },
  'L': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
    'aluminum': 7
  },
  'Ld': {  # copied from S
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'M': {
    'iron': 88,
    'nickel': 10,
    'cobalt': 0.5,
  },
  'O': {
    'nickel-iron': 2.965,
    'platinum': 1.25,
  },
  'P': {  # correspond to CI, CM carbonaceous chondrites
    'water': 12.5,
  },
  'R': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'S': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  # Sa, Sq, Sr, Sk, and Sl all transition objects (assume half/half)
  'Sa': {
    'magnesium silicate': 5e-31,
    'iron silicate': 0,
  },
  'Sq': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'Sr': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'Sk': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'Sl': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'S(IV)': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'Q': {
    'nickel-iron': 13.315,
  },
  'R': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },
  'T': {
    'iron': 6,
  },
  'U': {

  },
  'V': {
    'magnesium silicate': 1e-30,
    'iron silicate': 0,
  },

  # TODO use density to decide what kind of X the object is?

  'X': {  # TODO these vals only apply to M-type within X
    'iron': 88,
    'nickel': 10,
    'cobalt': 0.5,
  },
  'Xe': {  # TODO these vals only apply to M-type within X
    'iron': 88,
    'nickel': 10,
    'cobalt': 0.5,
  },
  'Xc': {  # TODO these vals only apply to M-type within X
    'iron': 88,
    'nickel': 10,
    'cobalt': 0.5,
    'platinum': 0.005,
  },
  'Xk': {  # TODO these vals only apply to M-type within X
    'iron': 88,
    'nickel': 10,
    'cobalt': 0.5,
  },
  'comet': {
      # no estimates for now, because assumed mass, etc. would be off
  },
}

In [28]:
# Check that all specs in acc_resources are in the dictionary keys
missing_keys = []
for i in list(acc_resources['new_spec'].unique()):
    if i not in list(spec_per_resources.keys()):
        i.append(missing_keys)
missing_keys

[]

In [29]:
'id' in acc_resources.columns

True

In [30]:
for key in spec_per_resources:
    for key1 in spec_per_resources[key]:
        if key1 not in acc_resources.columns:
            acc_resources[key1] = ''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_resources[key1] = ''


In [31]:
resources_list = list(acc_resources.columns)
resources_list.remove('id')
resources_list.remove('new_spec')

In [32]:
resources_list

['hydrogen',
 'nitrogen',
 'ammonia',
 'iron',
 'water',
 'nickel',
 'cobalt',
 'magnesium silicate',
 'iron silicate',
 'aluminum',
 'nickel-iron',
 'platinum']

In [33]:
for col in resources_list:
    for i, row in acc_resources.iterrows():
        if col in spec_per_resources[row['new_spec']]:
            acc_resources.at[i,col] = spec_per_resources[row['new_spec']][col]

In [76]:
acc_resources.to_csv('../asteroid_mining/data/acc_resources_v3.csv')

In [35]:
'hydrogen' in spec_per_resources['A']

False

## Get the mass of each asteroid

### Get the diameter

In [77]:
df_acc.to_csv('../asteroid_mining/data/df_acc_v3.csv')

In [37]:
df_acc['Estimated Diameter (m)']

0      272 - 1216
1      282 - 1262
2       159 - 709
3       200 - 893
4       159 - 713
          ...    
654       19 - 85
655       14 - 62
656       17 - 78
657       18 - 81
658      33 - 148
Name: Estimated Diameter (m), Length: 659, dtype: object

In [38]:
df_acc[~df_acc['Estimated Diameter (m)'].isnull()].shape

(659, 88)

In [39]:
def clean_diameter_acc(x):
    items = x.split('-')
    
    items_new = []
    for i in items:
        items_new.append(int(i.strip()))
    
    return sum(items_new) / len(items_new)

In [40]:
df_acc['diameter_clean'] = df_acc['Estimated Diameter (m)'].apply(clean_diameter_acc)

In [41]:
df_acc[['Estimated Diameter (m)', 'diameter_clean']]

Unnamed: 0,Estimated Diameter (m),diameter_clean
0,272 - 1216,744.0
1,282 - 1262,772.0
2,159 - 709,434.0
3,200 - 893,546.5
4,159 - 713,436.0
...,...,...
654,19 - 85,52.0
655,14 - 62,38.0
656,17 - 78,47.5
657,18 - 81,49.5


### Get the volume based on diameter

In [42]:
df_acc['est_volume'] = 4/3 * math.pi * ((df_acc['diameter_clean'] / 2) ** 3)

In [43]:
df_acc[df_acc['est_volume'] < 0]

Unnamed: 0.1,Unnamed: 0,id,spkid,full_name,pdes,name,prefix,neo,pha,H,...,OCC,"Min. delta-V [delta-V, dur.] (km/s), (d)","Min. Duration [delta-V, dur.] (km/s), (d)",Viable Trajectories,Next Optical Opportunity (yyyy-mm [Vp]),Next Goldstone Radar Opportunity (yyyy-mm [SNR]),spec_B_clean,new_spec,diameter_clean,est_volume


### Get the density

In [44]:
TYPE_DENSITY_MAP = {
  'C': 1.38,
  'D': 1.38,
  'P': 1.38,
  'T': 1.38,
  'B': 1.38,
  'G': 1.38,
  'F': 1.38,
  'S': 2.71,
  'K': 2.71,
  'Q': 2.71,
  'V': 2.71,
  'R': 2.71,
  'A': 2.71,
  'M': 5.32,
}

In [45]:
tholen_smasii_map = {'A':'A', 
                     'B':'B', 
                     'C':'C', 
                     'Ch':'C', 
                     'Cg':'C', 
                     'Cgh':'C', 
                     'Cb':'C', 
                     'D':'D', 
                     'E':'M', 
                     'K':'P', 
                     'L':'A', 
                     'Ld':'A', 
                     'M':'M', 
                     'O':'P', 
                     'P':'P', 
                     'R':'R', 
                     'S':'S', 
                     'Sa':'S', 
                     'Sq':'S', 
                     'Sr':'S', 
                     'Sk':'S', 
                     'Sl':'S', 
                     'S(IV)':'S', 
                     'Q':'Q', 
                     'T':'T', 
                     'U':'S', 
                     'V':'V', 
                     'X':'M', 
                     'Xe':'M', 
                     'Xc':'M', 
                     'Xk':'M'}

In [46]:
def get_density(x):
    general_spec = tholen_smasii_map[x]
    return TYPE_DENSITY_MAP[general_spec]

In [47]:
df_acc['est_density'] = df_acc['new_spec'].apply(get_density)

### Calculate mass

In [48]:
df_acc.head()

Unnamed: 0.1,Unnamed: 0,id,spkid,full_name,pdes,name,prefix,neo,pha,H,...,"Min. delta-V [delta-V, dur.] (km/s), (d)","Min. Duration [delta-V, dur.] (km/s), (d)",Viable Trajectories,Next Optical Opportunity (yyyy-mm [Vp]),Next Goldstone Radar Opportunity (yyyy-mm [SNR]),spec_B_clean,new_spec,diameter_clean,est_volume,est_density
0,0,a0003361,2003361,3361 Orpheus (1982 HR),3361,Orpheus,,Y,Y,19.03,...,"9.150, 354","9.946, 322",17332,2021-05 [14.3],2021-11 [69],,Sr,744.0,215634100.0,2.71
1,1,a0025143,2025143,25143 Itokawa (1998 SF36),25143,Itokawa,,Y,Y,19.2,...,"9.409, 210","9.873, 178",21559,2021-06 [19.5],2033-03 [22],S,S,772.0,240907600.0,2.71
2,2,a0089136,2089136,89136 (2001 US16),89136,,,Y,Y,20.2,...,"9.308, 298","9.905, 138",28990,2022-09 [19.3],2034-05 [1.e3],,Sr,434.0,42802370.0,2.71
3,3,a0099942,2099942,99942 Apophis (2004 MN4),99942,Apophis,,Y,Y,19.7,...,"6.049, 354","9.789, 202",272315,2021-05 [19.6],2029-04 [8.e10],Sq,Sq,546.5,85461230.0,2.71
4,4,a0101955,2101955,101955 Bennu (1999 RQ36),101955,Bennu,,Y,Y,20.19,...,"7.052, 354","9.906, 282",47754,none,none,B,B,436.0,43396840.0,1.38


In [49]:
df_acc['est_mass'] = ''

for i, row in df_acc.iterrows():
    mass = row['est_volume'] * row['est_density'] / 6 * 1000
    
    # Add some random factor
    mass = mass + (random.random() - .5) * 1e6
    
    if mass > 1e11:
      # if it's huge, penalize it because the surface will be covered in ejecta, etc.
      # and the goodies will be far beneath. Also, gravity well.
        mass = mass * 1e-3
    
    df_acc.at[i,"est_mass"] = mass

In [50]:
df_acc['est_mass'] = df_acc['est_mass'].astype('float64')

In [51]:
df_acc['est_mass'].describe()

count    6.590000e+02
mean     1.435752e+09
std      7.193249e+09
min      2.870217e+06
25%      1.504124e+07
50%      4.533207e+07
75%      1.754880e+08
max      9.739432e+10
Name: est_mass, dtype: float64

In [52]:
df_acc['est_mass'].max()

97394321180.0709

## Estimate resources for each asteroid in kg

In [53]:
df_acc[['id', 'est_mass']]

Unnamed: 0,id,est_mass
0,a0003361,9.739432e+10
1,a0025143,1.088096e+08
2,a0089136,1.933264e+10
3,a0099942,3.859993e+10
4,a0101955,9.981086e+09
...,...,...
654,bK18T00A,1.720592e+07
655,bK18T00R,1.306843e+07
656,bK18T01Z,2.560335e+07
657,bK18T06D,1.443001e+07


In [54]:
acc_total_resources = pd.merge(acc_resources, df_acc[['id', 'est_mass']], on='id', how='inner')

In [55]:
acc_total_resources.head()

Unnamed: 0,id,new_spec,hydrogen,nitrogen,ammonia,iron,water,nickel,cobalt,magnesium silicate,iron silicate,aluminum,nickel-iron,platinum,est_mass
0,a0003361,Sr,,,,,,,,0.0,0.0,,,,97394320000.0
1,a0025143,S,,,,,,,,0.0,0.0,,,,108809600.0
2,a0089136,Sr,,,,,,,,0.0,0.0,,,,19332640000.0
3,a0099942,Sq,,,,,,,,0.0,0.0,,,,38599930000.0
4,a0101955,B,0.235,0.001,0.001,10.0,,,,,,,,,9981086000.0


In [56]:
acc_total_resources.loc[3, 'cobalt'] == ''

True

In [57]:
for i in resources_list:
    acc_total_resources[i] = acc_total_resources[i].apply(lambda x: 0 if x == '' else x)
    acc_total_resources[f'{i}_kg'] = acc_total_resources[i] * acc_total_resources['est_mass']

In [58]:
acc_total_resources['water_kg'].sum()

11300055625.277033

In [59]:
acc_total_resources = acc_total_resources.drop(columns=resources_list)

In [78]:
acc_total_resources.to_csv('../asteroid_mining/data/acc_total_resources_v3.csv')

In [60]:
acc_total_resources

Unnamed: 0,id,new_spec,est_mass,hydrogen_kg,nitrogen_kg,ammonia_kg,iron_kg,water_kg,nickel_kg,cobalt_kg,magnesium silicate_kg,iron silicate_kg,aluminum_kg,nickel-iron_kg,platinum_kg
0,a0003361,Sr,9.739432e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.739432e-20,0.0,0.0,0.0,0.0
1,a0025143,S,1.088096e+08,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.088096e-22,0.0,0.0,0.0,0.0
2,a0089136,Sr,1.933264e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.933264e-20,0.0,0.0,0.0,0.0
3,a0099942,Sq,3.859993e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,3.859993e-20,0.0,0.0,0.0,0.0
4,a0101955,B,9.981086e+09,2.345555e+09,9.981086e+06,9.981086e+06,9.981086e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,bK18T00A,Cb,1.720592e+07,4.043391e+06,1.720592e+04,1.720592e+04,1.428091e+06,1.720592e+06,1.204414e+05,1.720592e+04,0.000000e+00,0.0,0.0,0.0,0.0
655,bK18T00R,Sq,1.306843e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.306843e-23,0.0,0.0,0.0,0.0
656,bK18T01Z,S,2.560335e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,2.560335e-23,0.0,0.0,0.0,0.0
657,bK18T06D,D,1.443001e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,3.318902e+02,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0


# Descriptive statistics on asteroids resources

## Asteroids description

In [61]:
df_acc.columns

Index(['Unnamed: 0', 'id', 'spkid', 'full_name', 'pdes', 'name', 'prefix',
       'neo', 'pha', 'H', 'G', 'M1', 'M2', 'K1', 'K2', 'PC', 'diameter',
       'extent', 'albedo', 'rot_per', 'GM', 'BV', 'UB', 'IR', 'spec_B',
       'spec_T', 'H_sigma', 'diameter_sigma', 'orbit_id', 'epoch', 'epoch_mjd',
       'epoch_cal', 'equinox', 'e', 'a', 'q', 'i', 'om', 'w', 'ma', 'ad', 'n',
       'tp', 'tp_cal', 'per', 'per_y', 'moid', 'moid_ld', 'moid_jup', 't_jup',
       'sigma_e', 'sigma_a', 'sigma_q', 'sigma_i', 'sigma_om', 'sigma_w',
       'sigma_ma', 'sigma_ad', 'sigma_n', 'sigma_tp', 'sigma_per', 'class',
       'producer', 'data_arc', 'first_obs', 'last_obs', 'n_obs_used',
       'n_del_obs_used', 'n_dop_obs_used', 'condition_code', 'rms', 'two_body',
       'A1', 'A2', 'A3', 'DT', 'Object', 'Orbit ID', 'H (mag)',
       'Estimated Diameter (m)', 'OCC',
       'Min. delta-V [delta-V, dur.] (km/s), (d)',
       'Min. Duration [delta-V, dur.] (km/s), (d)', 'Viable Trajectories',
       'Next

In [62]:
df_acc['est_mass_th_tonnes'] = df_acc['est_mass']*1e-6

In [63]:
df_acc['est_mass_th_tonnes'].describe()

count      659.000000
mean      1435.752403
std       7193.248726
min          2.870217
25%         15.041241
50%         45.332072
75%        175.487967
max      97394.321180
Name: est_mass_th_tonnes, dtype: float64

In [64]:
df_acc[['diameter_clean', 'est_mass', 'Next Optical Opportunity (yyyy-mm [Vp])']].describe()

Unnamed: 0,diameter_clean,est_mass
count,659.0,659.0
mean,94.003035,1435752000.0
std,120.299228,7193249000.0
min,30.0,2870217.0
25%,39.5,15041240.0
50%,57.5,45332070.0
75%,90.5,175488000.0
max,1398.0,97394320000.0


In [65]:
col_analysis = ['id', 'name', 'diameter_clean', 'est_mass_th_tonnes', 'Next Optical Opportunity (yyyy-mm [Vp])']

In [66]:
df_analysis = df_acc[col_analysis]

In [67]:
df_acc[df_acc['full_name'].str.contains("Orpheus")]

Unnamed: 0.1,Unnamed: 0,id,spkid,full_name,pdes,name,prefix,neo,pha,H,...,Viable Trajectories,Next Optical Opportunity (yyyy-mm [Vp]),Next Goldstone Radar Opportunity (yyyy-mm [SNR]),spec_B_clean,new_spec,diameter_clean,est_volume,est_density,est_mass,est_mass_th_tonnes
0,0,a0003361,2003361,3361 Orpheus (1982 HR),3361,Orpheus,,Y,Y,19.03,...,17332,2021-05 [14.3],2021-11 [69],,Sr,744.0,215634100.0,2.71,97394320000.0,97394.32118


In [68]:
acc_total_resources

Unnamed: 0,id,new_spec,est_mass,hydrogen_kg,nitrogen_kg,ammonia_kg,iron_kg,water_kg,nickel_kg,cobalt_kg,magnesium silicate_kg,iron silicate_kg,aluminum_kg,nickel-iron_kg,platinum_kg
0,a0003361,Sr,9.739432e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,9.739432e-20,0.0,0.0,0.0,0.0
1,a0025143,S,1.088096e+08,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.088096e-22,0.0,0.0,0.0,0.0
2,a0089136,Sr,1.933264e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.933264e-20,0.0,0.0,0.0,0.0
3,a0099942,Sq,3.859993e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,3.859993e-20,0.0,0.0,0.0,0.0
4,a0101955,B,9.981086e+09,2.345555e+09,9.981086e+06,9.981086e+06,9.981086e+10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,bK18T00A,Cb,1.720592e+07,4.043391e+06,1.720592e+04,1.720592e+04,1.428091e+06,1.720592e+06,1.204414e+05,1.720592e+04,0.000000e+00,0.0,0.0,0.0,0.0
655,bK18T00R,Sq,1.306843e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.306843e-23,0.0,0.0,0.0,0.0
656,bK18T01Z,S,2.560335e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,2.560335e-23,0.0,0.0,0.0,0.0
657,bK18T06D,D,1.443001e+07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,3.318902e+02,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0


In [69]:
#df_neo.to_csv('../asteroid_mining/data/df_neo_v2.csv')

In [70]:
cols_visu = ['id', 'full_name', 'Next Optical Opportunity (yyyy-mm [Vp])', 'new_spec', 'diameter_clean', 'est_density', 'est_mass_th_tonnes']

In [71]:
df_outliers = df_acc[df_acc['est_mass_th_tonnes'] > 15_000][cols_visu]
df_outliers.sort_values(by=['est_mass_th_tonnes'])

Unnamed: 0,id,full_name,Next Optical Opportunity (yyyy-mm [Vp]),new_spec,diameter_clean,est_density,est_mass_th_tonnes
78,bK01Q34C,(2001 QC34),2024-06 [22.4],S,434.0,2.71,19331.955082
2,a0089136,89136 (2001 US16),2022-09 [19.3],Sr,434.0,2.71,19332.64294
48,a0490581,490581 (2009 WZ104),2021-09 [21.4],Sq,484.5,2.71,26896.918698
7,a0162173,162173 Ryugu (1999 JU3),2021-05 [19.0],Cg,630.0,1.38,30112.3775
27,a0416186,416186 (2002 TD60),2022-11 [20.8],K,654.0,1.38,33686.309389
3,a0099942,99942 Apophis (2004 MN4),2021-05 [19.6],Sq,546.5,2.71,38599.931801
5,a0138404,138404 (2000 HA24),2021-05 [17.7],O,720.0,1.38,44949.887162
19,a0341843,341843 (2008 EV5),2021-12 [21.8],X,476.0,5.32,50069.958044
29,a0424482,424482 (2008 DG5),2021-09 [19.9],S,599.0,2.71,50827.802794
18,a0329437,329437 (2002 OA22),2021-05 [19.8],S,627.5,2.71,58433.164268


In [72]:
#df_outliers.sort_values(by=['est_mass_th_tonnes']).to_csv('../asteroid_mining/data/outliers_acc_v3.csv')


In [73]:
df_acc[df_acc['est_mass'] < 200 * 1e9]['est_mass'].count()

659

In [74]:
sns.histplot(df_acc[df_acc['est_mass'] < 200 * 1e9][cols_visu]['est_mass'], bins=50)

KeyError: 'est_mass'

## Asteroids resources description

In [None]:
acc_total_resources.sum(axis = 0)

In [None]:
resources_list2 = list(acc_total_resources.columns)[3:]

In [None]:
sum_per_type = []
for i in resources_list2:
    sum_per_type.append(acc_total_resources[i].sum())

In [None]:
total_resources = pd.DataFrame({'resource': resources_list, 'total_kg': sum_per_type})

In [None]:
total_resources

In [None]:
total_resources.to_csv('../asteroid_mining/data/total_resources_acc_v2.csv')

In [None]:
palette = sns.color_palette('Paired', 10)

In [None]:
sns.barplot(x='resource', y='total_kg', palette=palette,data=total_resources)

In [None]:
sns.catplot(x='resource', y='total_kg', palette=palette, data=total_resources, height=5, aspect=3, kind='count', );
sns.despine()
plt.show()