In [None]:
#video series https://aka.ms/LearnWithDrG?azure-portal=true


import pandas

rock_samples = pandas.read_csv('data/rocksamples.csv')


In [None]:
rock_samples.head()

In [None]:
rock_samples.info()

In [None]:
rock_samples['Weight(g)'] = rock_samples['Weight(g)'].apply(lambda x : x * 0.001)
rock_samples.rename(columns={'Weight(g)':'Weight(kg)'}, inplace=True)
rock_samples.head()

In [None]:
missions = pandas.DataFrame()
missions['Mission']=rock_samples['Mission'].unique()
missions.head()

In [None]:
missions.info()

In [None]:
missions

In [None]:
sample_total_weight = rock_samples.groupby('Mission')['Weight(kg)'].sum()
missions = pandas.merge(missions,sample_total_weight,on='Mission')
missions.rename(columns={'Weight(kg)':'Sample Weight(kg)'},inplace=True)
missions

In [None]:
#get sample weight difference between the mission line and the one above
missions['Weight Diff']=missions['Sample Weight(kg)'].diff()
missions
#NaN means not a number : due to no previous mission

In [None]:
#replacing NaN by 0
missions['Weight Diff']=missions['Weight Diff'].fillna(value=0)
missions

In [None]:
#from this : https://nssdc.gsfc.nasa.gov/nmc/SpacecraftQuery.jsp

missions['Lunar Module (LM)'] = {'Eagle (LM-5)', 'Intrepid (LM-6)', 'Antares (LM-8)', 'Falcon (LM-10)', 'Orion (LM-11)', 'Challenger (LM-12)'}
missions['LM Mass (kg)'] = {15103, 15235, 15264, 16430, 16445, 16456}
missions['LM Mass Diff'] = missions['LM Mass (kg)'].diff()
missions['LM Mass Diff'] = missions['LM Mass Diff'].fillna(value=0)

missions['Command Module (CM)'] = {'Columbia (CSM-107)', 'Yankee Clipper (CM-108)', 'Kitty Hawk (CM-110)', 'Endeavor (CM-112)', 'Casper (CM-113)', 'America (CM-114)'}
missions['CM Mass (kg)'] = {5560, 5609, 5758, 5875, 5840, 5960}
missions['CM Mass Diff'] = missions['CM Mass (kg)'].diff()
missions['CM Mass Diff'] = missions['CM Mass Diff'].fillna(value=0)

missions

In [None]:
missions['Total Weight (kg)'] = missions['LM Mass (kg)'] + missions['CM Mass (kg)']
missions['Total Weight Diff'] = missions['LM Mass Diff'] + missions['CM Mass Diff']
missions

In [None]:
# Sample-to-weight ratio
saturnVPayload = 43500
missions['Crewed Area : Payload'] = missions['Total Weight (kg)'] / saturnVPayload
missions['Sample : Crewed Area'] = missions['Sample Weight(kg)'] / missions['Total Weight (kg)']
missions['Sample : Payload'] = missions['Sample Weight(kg)'] / saturnVPayload
missions

In [None]:
# Save the ratios
# We can then use the mean() function to take the average of all those ratios across all the missions.
crewedArea_payload_ratio = missions['Crewed Area : Payload'].mean()
sample_crewedArea_ratio = missions['Sample : Crewed Area'].mean()
sample_payload_ratio = missions['Sample : Payload'].mean()
print(crewedArea_payload_ratio)
print(sample_crewedArea_ratio)
print(sample_payload_ratio)

In [None]:
# Creating an Artemis mission dataframe
artemis_crewedArea = 26520
artemis_mission = pandas.DataFrame({'Mission':['artemis1','artemis1b','artemis2'],
                                 'Total Weight (kg)':[artemis_crewedArea,artemis_crewedArea,artemis_crewedArea],
                                 'Payload (kg)':[26988, 37965, 42955]})
artemis_mission

In [None]:
# And we can estimate the weight of samples based on the ratios we determined from the Artemis missions:
artemis_mission['Sample Weight from Total (kg)'] = artemis_mission['Total Weight (kg)'] * sample_crewedArea_ratio
artemis_mission['Sample Weight from Payload (kg)'] = artemis_mission['Payload (kg)'] * sample_payload_ratio
artemis_mission

In [None]:
# Finally, we can get the average of the two predictions:
artemis_mission['Estimated Sample Weight (kg)'] = (artemis_mission['Sample Weight from Payload (kg)'] + artemis_mission['Sample Weight from Total (kg)'])/2
artemis_mission

In [None]:
# Prioritize Moon rock sample gathering based on data
# First, we can determine how much remains of each sample that was returned from the Apollo missions, given the amount that was originally collected and the percentage of remaining pristine sample
rock_samples['Remaining(kg)'] = rock_samples['Weight(kg)'] * (rock_samples['Pristine(%)'] * .01)
rock_samples.head()


In [None]:
# it's difficult to get an understanding of what the values are. For that, you can use the describe() function:
rock_samples.describe()
# This helps us see that, on average, each sample weighs about .16 kg and has about 84% of the original amount remaining. We can use this knowledge to extract only the samples that are likely running low, which means that they have been used a lot by researchers.


In [None]:
# We can use this knowledge to extract only the samples that are likely running low, which means that they have been used a lot by researchers.
low_samples = rock_samples.loc[(rock_samples['Weight(kg)'] >= .16) & (rock_samples['Pristine(%)'] <= 50)]
low_samples.head()

In [None]:
low_samples.info()

In [None]:
#Twenty-seven samples seems like a small amount to base a recommendation on. We can probably find some other samples that are needed for more research here on Earth. To discover them, we can use the unique() function to see how many unique types we have across the low_samples and rock_samples dataframes.
low_samples.Type.unique()


In [None]:
rock_samples.Type.unique()

In [None]:
low_samples.groupby('Type')['Weight(kg)'].count()

In [None]:
# Notice that there are more Basalt and Breccia type rocks with low samples than those of Core and Soil. Additionally, because the likelihood is high that every mission has some Core and Soil collection requirements, we can focus on the Basalt and Breccia rock types for the samples that we need to have collected:
needed_samples = low_samples[low_samples['Type'].isin(['Basalt', 'Breccia'])]
needed_samples.info()

In [None]:
# Develop a recommendation of Moon rock samples to be collected

# Let's take a step back and see how the number of samples compares to the amount of sample. We can compare the total weight from the needed_samples dataframe to the rock_samples dataframe. That is, we'll compare the samples we've identified as running low to all the samples collected on Apollo missions
needed_samples.groupby('Type')['Weight(kg)'].sum()

In [None]:
rock_samples.groupby('Type')['Weight(kg)'].sum()

In [None]:
# One bit of information really stands out: we've never had a lot of Crustal rocks in the first place.

#We can add Crustal rocks to the set of needed samples:

needed_samples = needed_samples.append(rock_samples.loc[rock_samples['Type'] == 'Crustal'])
needed_samples.info()

In [None]:





# Summary of needed samples


# The final step is to consolidate everything we know into one table that can be shared with the astronauts. First, we need a column for each type of rock that we have already identified as rocks we want more samples of:
needed_samples_overview = pandas.DataFrame()
needed_samples_overview['Type'] = needed_samples.Type.unique()
needed_samples_overview

In [None]:
# Next, we want the total weight of each type of rock that was originally collected:
needed_sample_weights = needed_samples.groupby('Type')['Weight(kg)'].sum().reset_index()
needed_samples_overview = pandas.merge(needed_samples_overview, needed_sample_weights, on='Type')
needed_samples_overview.rename(columns={'Weight(kg)':'Total Weight(kg)'}, inplace=True)
needed_samples_overview

In [None]:
# When astronauts are up on the Moon, one way they can identify rocks is by their size. If we can give them an estimated size of each type of rock, that might make their collection process easier
needed_sample_ave_weights = needed_samples.groupby('Type')['Weight(kg)'].mean().reset_index()
needed_samples_overview = pandas.merge(needed_samples_overview, needed_sample_ave_weights, on='Type')
needed_samples_overview.rename(columns={'Weight(kg)':'Ave Weight(kg)'}, inplace=True)
needed_samples_overview

In [None]:
# Crustals are small! They're probably a lot harder to spot, so no wonder we don't have a lot of them.

#We probably want to give the astronauts some indication of how many of each type we want them to collect. So, for the three types we're looking for, we should grab the total number we have of each type and get the remaining percentage of each type of rock
total_rock_count = rock_samples.groupby('Type')['ID'].count().reset_index()
needed_samples_overview = pandas.merge(needed_samples_overview, total_rock_count, on='Type')
needed_samples_overview.rename(columns={'ID':'Number of Samples'}, inplace=True)
total_rocks = needed_samples_overview['Number of Samples'].sum()
needed_samples_overview['Percentage of Rocks'] = needed_samples_overview['Number of Samples'] / total_rocks
needed_samples_overview

In [None]:
# And finally, to tie it all back into a recommendation to the Artemis program, we can determine the average weight of samples we estimated in the preceding unit.
artemis_ave_weight = artemis_mission['Estimated Sample Weight (kg)'].mean()
artemis_ave_weight

In [None]:
# We can use this number to determine how many of each rock we want the astronauts to aim to collect:
needed_samples_overview['Weight to Collect'] = needed_samples_overview['Percentage of Rocks'] * artemis_ave_weight

needed_samples_overview['Rocks to Collect'] = needed_samples_overview['Weight to Collect'] / needed_samples_overview['Ave Weight(kg)']

needed_samples_overview

In [None]:
# sample total weight
sample_total_weight = rock_samples['Weight(kg)'].sum()
sample_total_weight 