Notebook for pairing probablity for ordinary chondrites

H, L, and LL meteorites are separated

In [7]:
import itertools

import numpy as np
import pandas as pd


from models.meteorite import Meteorite
from scripts.pairing import calculate_pairing_probability



In [8]:
#import data
data_test = pd.read_csv('tests.csv', delimiter=';')


#Replace commas with points 

for col in data_test.columns:
    data_test[col] = data_test[col].apply(lambda x: str(x).replace(",", "."))
    try:
        data_test[col] = data_test[col].astype(float)
    except ValueError:
        pass

#Pairing data
pairing_data_test = data_test
display(pairing_data_test)
display(pairing_data_test.dtypes)




Unnamed: 0,Work name,Official name,TKW (g),n,Magnetic Susceptibility,Group,Petrographic Type,Weathering Grade,Fayalite Content,Ferrosilite Content,Wo,Longitude,Latitude
0,Met1,NBA 543,333.8,1.0,4.76,H,5.0,2.0,18.4,15.9,1.11,-69.91865,-25.07382
1,Met2,NBA 926,328.0,1.0,4.89,H,4.0,0.0,17.0,15.2,1.0,-69.93073,-25.0701
2,Met3,NBA 906,282.18,2.0,5.5,H,5.0,1.0,19.7,17.1,1.4,-69.926319,-25.073038
3,Met4,NBA 725,280.0,1.0,4.5,H,5.5,2.0,20.0,17.5,1.4,-69.91627,-25.09617
4,Met5,NBA 245,277.0,1.0,4.87,H,6.0,3.0,17.0,17.0,0.9,-69.90556,-25.08939
5,Met6,NBA 917,275.79,1.0,5.4,H,3.0,4.0,18.8,16.1,1.3,-69.924361,-25.070903
6,Met7,NBA 747,269.0,1.0,4.76,H,5.0,2.0,18.4,15.9,1.11,-69.91727,-25.07558
7,Met8,NBA 477,268.2,1.0,4.55,H,6.0,2.0,17.5,16.5,1.25,-69.91232,-25.0789
8,Met9,NBA 752,251.4,1.0,5.12,H,5.0,5.0,18.4,15.8,0.8,-69.91697,-25.09413


Work name                   object
Official name               object
TKW (g)                    float64
n                          float64
Magnetic Susceptibility    float64
Group                       object
Petrographic Type          float64
Weathering Grade           float64
Fayalite Content           float64
Ferrosilite Content        float64
Wo                         float64
Longitude                  float64
Latitude                   float64
dtype: object

In [9]:
meteorites_list_test = []

for index, row in pairing_data_test.iterrows():
    meteorites_list_test.append(
        Meteorite(
            name=row["Work name"], position=tuple([row["Latitude"], row["Longitude"]]),
            petrographic_type=row["Petrographic Type"],
            weathering_grade=row["Weathering Grade"], fa_content=row["Fayalite Content"],
            fs_content=row["Ferrosilite Content"], mag_sus=row["Magnetic Susceptibility"]
        )
    )


In [10]:
display(meteorites_list_test)


[Met1, Met2, Met3, Met4, Met5, Met6, Met7, Met8, Met9]

## <u>Calculation :</u>
- Generating aL possible combinations (non repeting) : avoids repetitive calculations
- Creating a DataFrame (2D table) with col, index = meteorites
- Setting diagonals to 1 (P(A, A) == 1)
- Calculating the pairing probabilities from the list
- Assigning the results to the DataFrame (twice, as P(A, B) == P(B, A))

In [11]:
combinations_test = list(itertools.combinations(meteorites_list_test, 2))

display(combinations_test)


[(Met1, Met2),
 (Met1, Met3),
 (Met1, Met4),
 (Met1, Met5),
 (Met1, Met6),
 (Met1, Met7),
 (Met1, Met8),
 (Met1, Met9),
 (Met2, Met3),
 (Met2, Met4),
 (Met2, Met5),
 (Met2, Met6),
 (Met2, Met7),
 (Met2, Met8),
 (Met2, Met9),
 (Met3, Met4),
 (Met3, Met5),
 (Met3, Met6),
 (Met3, Met7),
 (Met3, Met8),
 (Met3, Met9),
 (Met4, Met5),
 (Met4, Met6),
 (Met4, Met7),
 (Met4, Met8),
 (Met4, Met9),
 (Met5, Met6),
 (Met5, Met7),
 (Met5, Met8),
 (Met5, Met9),
 (Met6, Met7),
 (Met6, Met8),
 (Met6, Met9),
 (Met7, Met8),
 (Met7, Met9),
 (Met8, Met9)]

In [12]:
df_pairing_test = pd.DataFrame(index=meteorites_list_test, columns=meteorites_list_test)
np.fill_diagonal(df_pairing_test.values, 1)


In [13]:
display(df_pairing_test)


Unnamed: 0,Met1,Met2,Met3,Met4,Met5,Met6,Met7,Met8,Met9
Met1,1.0,,,,,,,,
Met2,,1.0,,,,,,,
Met3,,,1.0,,,,,,
Met4,,,,1.0,,,,,
Met5,,,,,1.0,,,,
Met6,,,,,,1.0,,,
Met7,,,,,,,1.0,,
Met8,,,,,,,,1.0,
Met9,,,,,,,,,1.0


In [14]:
combinations_results_test = dict.fromkeys(combinations_test)


In [15]:
for combination in combinations_results_test:
    pairing_proba = calculate_pairing_probability(met_1=combination[0], met_2=combination[1])
    combinations_results_test[combination] = pairing_proba
    df_pairing_test.at[combination[0], combination[1]] = pairing_proba
    df_pairing_test.at[combination[1], combination[0]] = pairing_proba


Setting index and columns as type string (current type is Meteorite class, possible storage issue)

In [24]:
df_pairing_test.index = df_pairing_test.index.astype(str)
df_pairing_test.columns = df_pairing_test.columns.astype(str)


In [25]:
display(df_pairing_test)

Unnamed: 0,Met1,Met2,Met3,Met4,Met5,Met6,Met7,Met8,Met9
Met1,1.0,0.472805,0.032173,0.223279,0.0,0.0,0.989519,0.0,0.0
Met2,0.472805,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Met3,0.032173,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
Met4,0.223279,0.0,0.0,1.0,0.0,0.0,0.223279,0.0,0.0
Met5,0.0,0.0,0.0,0.0,1.0,0.0,0.409834,0.458012,0.0
Met6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Met7,0.989519,0.0,0.0,0.223279,0.409834,0.0,1.0,0.0,0.0
Met8,0.0,0.0,0.0,0.0,0.458012,0.0,0.0,1.0,0.0
Met9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


#### Note :
Further usage should include parallel processing as the number of calculations is suceptible highly increase

In [26]:
df_pairing_test.to_pickle(path="datakeep/pairings_test.pkl")  #  using PKL to avoid the index


In [27]:
display(pd.read_pickle(filepath_or_buffer="datakeep/pairings_test.pkl"))  # Checking


Unnamed: 0,Met1,Met2,Met3,Met4,Met5,Met6,Met7,Met8,Met9
Met1,1.0,0.472805,0.032173,0.223279,0.0,0.0,0.989519,0.0,0.0
Met2,0.472805,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Met3,0.032173,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
Met4,0.223279,0.0,0.0,1.0,0.0,0.0,0.223279,0.0,0.0
Met5,0.0,0.0,0.0,0.0,1.0,0.0,0.409834,0.458012,0.0
Met6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Met7,0.989519,0.0,0.0,0.223279,0.409834,0.0,1.0,0.0,0.0
Met8,0.0,0.0,0.0,0.0,0.458012,0.0,0.0,1.0,0.0
Met9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Numbers


Take values above diagonal

In [31]:
pairing_values_test = df_pairing_test.values

above_diagonal_values_test = pairing_values_test[np.triu_indices_from(pairing_values_test, k=1)]



In [32]:
mean_probability_test = np.mean(above_diagonal_values_test)
print("{:.2f}".format(mean_probability_test))

print(str(len(pairing_data_test)) + ' H chondrites before pairing')
print("{:.0f}".format(len(pairing_data_test)*(1-mean_probability_test)) + ' H chondrites after pairing') #Number of meteorites after pairing

0.08
9 H chondrites before pairing
8 H chondrites after pairing
