In [1]:
import pandas as pd
import re

In [2]:
meteor_df = pd.read_csv('./cleaned_meteorite_landing.csv')

In [70]:
# Define classification groups

chondrites = {
    'chondrite':['chondrite'],
    'carbonaceous':['cb', 'cba', 'cbb', 'ch', 'ck', 'cm', 'cr', 'cv', 'co', 'ci', 'c'],
    'ordinary':['h', 'l', 'll'],
    'rumuruti':['r'],
    'enstatite':['eh', 'el', 'enst'],
    'k':['k']
}

achondrites = {
    'achondrite':['achondrite'],
    'primitive_achondrite':['iodranite', 'acapulcoite', 'winonaite'],
    'martian':['martian', 'shergottite', 'nakhlite', 'chassignite', 'opx'],
    'aubrite':['aubrite'],
    'ureilite':['ureilite'],
    'hed':['eucrite', 'diogenite', 'howardite'],
    'angrite':['angrite'],
    'brachinite':['brachinite'],
    'lunar':['lunar', 'feldsp. breccia', 'basalt']
}

stony_iron = ['stone', 'pallasite', 'mesosiderite']

iron = ['iron', 'iab', 'iiab', 'iiiab', 'iva']

In [71]:
# Define functions to determine is a classification belings to the specified parent class

def is_achondrite(input_a):
    for key in achondrites.keys():
#         print(f'key = {key}')
        for val in achondrites[key]:
#             print(f'val = {val}')  
            if re.match(f'^{val}', input_a) or re.match(f'^{val}$', input_a):
#                 print(f'val = {val}, classification = {input_a}')
                return True
    return False
            
def is_chondrite(input_c):
    for key in chondrites.keys():
        for val in chondrites[key]:
            if re.match(f'^{val}[^a-zA-z]', input_c) or re.match(f'^{val}$', input_c):
#                 print(f'val = {val}, classification = {input_c}')
                return True
    return False

def is_stony_iron(input_s):
    for val in stony_iron:
        if re.match(f'^{val}', input_s):
            return True
    return False

def is_iron(input_i):
    for val in iron:
        if re.match(f'^{val}', input_i):
            return True
    return False

# Define function to return parent class as a string

def get_parent_class(classification):
    if is_achondrite(classification) == True:
        return 'achondrite'
    elif is_chondrite(classification) == True:
        return 'chondrite'
    elif is_stony_iron(classification) == True:
        return 'stony-iron'
    elif is_iron(classification) == True:
        return 'iron'
    else:
        return 'unclassified'

In [72]:
# Create a list of all raw classifications, convert to all lower

classes = meteor_df['classification'].tolist()

classes = [val.lower() for val in classes]

In [73]:
# Create list of parent classes

parent_class = [get_parent_class(val) for val in classes]
parent_class[0:5]

['chondrite', 'chondrite', 'chondrite', 'achondrite', 'chondrite']

In [74]:
# Create new column with parent classification info and reorder columns

meteor_df['parent_class'] = parent_class
meteor_df = meteor_df[['name', 'meteor_first_word', 'classification', 'parent_class', 'mass', 'fall', 'year', 'latitude', 'longitude']]
meteor_df

Unnamed: 0,name,meteor_first_word,classification,parent_class,mass,fall,year,latitude,longitude
0,Aachen,Aachen,L5,chondrite,21.0,Fell,1880,50.77500,6.08333
1,Aarhus,Aarhus,H6,chondrite,720.0,Fell,1951,56.18333,10.23333
2,Abee,Abee,EH4,chondrite,107000.0,Fell,1952,54.21667,-113.00000
3,Acapulco,Acapulco,Acapulcoite,achondrite,1914.0,Fell,1976,16.88333,-99.90000
4,Achiras,Achiras,L6,chondrite,780.0,Fell,1902,-33.16667,-64.95000
...,...,...,...,...,...,...,...,...,...
31700,Zillah 002,Zillah,Eucrite,achondrite,172.0,Found,1990,29.03700,17.01850
31701,Zinder,Zinder,"Pallasite, ungrouped",stony-iron,46.0,Found,1999,13.78333,8.96667
31702,Zlin,Zlin,H4,chondrite,3.3,Found,1939,49.25000,17.66667
31703,Zubkovsky,Zubkovsky,L6,chondrite,2167.0,Found,2003,49.78917,41.50460


In [77]:
meteor_df[meteor_df['classification'] == 'Lodranite-an']

Unnamed: 0,name,meteor_first_word,classification,parent_class,mass,fall,year,latitude,longitude
27146,Yamato 74357,Yamato,Lodranite-an,unclassified,13.8,Found,1974,-71.74722,35.80556


In [76]:
temp = meteor_df[meteor_df['parent_class'] == 'unclassified']
temp
temp['classification'].unique()

array(['OC', 'Lodranite', 'E4', 'E6', 'Relict iron', 'E', 'E3-an', 'E3',
       'Fusion crust', 'Relict OC', 'E-an', 'E5', 'Lodranite-an', 'E5-an'],
      dtype=object)

In [95]:
# Export to csv

meteor_df.to_csv('cleaned_meteorite_landing_updated.csv')