In [152]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
from typing import *
from IPython.display import display, Markdown

# Data Dictionary

## What is our data?
Our data is about the oxide content within volcanic rocks found in the Matan volcanic center, northern Harrat Rahat, Saudi Arabia. Measurements are found using an electron microscope and the orginators of this study are Mark E. Stelten and Drew T. Downs.

| Column       | Description |
| -----------  | ----------- |
| Sample Name     | Sample collection number      |
| Analysis Name | Unique identifier for electron microprobe analysis     |
| Mineral Phase        | Type of mineral analyzed |
| Context             | The description and classification of the rock analysis |
| SiO2 | Weight percent concentration of silicon measured by electron microprobe reported as silicon oxide |
| TiO2 |  Weight percent concentration of titanium measured by electron microprobe reported as titanium oxide|
| Al2O3 |  Weight percent concentration of aluminum by electron microprobe reported as aluminum oxide |
| FeO* | Weight percent concentration of iron measured by electron microprobe reported as ferrous iron oxide. The asterisk indicates that in this measurement all the iron is calculated as ferrous iron. |
| FeO | Weight percent concentration of ferrous iron oxide recalculated from total iron measured by electron microprobe (FeO*) to maintain charge balance in spinel series minerals on a three cation - four oxygen basis, or in pyroxenes on a two cation - three oxygen basis. |
|Fe2O3| Weight percent concentration of ferric iron oxide recalculated from total iron measured by electron microprobe (FeO*) to maintain charge balance in spinel series minerals on a three cation - four oxygen basis, or in pyroxenes on a two cation - three oxygen basis. |
| CaO| Weight percent concentration of calcium measured by electron microprobe reported as calcium oxide |
| Na2O | Weight percent concentration of sodium measured by electron microprobe reported as sodium oxide |
|K2O | Weight percent concentration of potassium measured by electron microprobe reported as potassium oxide |
| MnO | Weight percent concentration of manganese measured by electron microprobe reported as manganese oxide |
| MgO | Weight percent concentration of magnesium measured by electron microprobe reported as magnesium oxide |
|NiO | Weight percent concentration of nickel measured by electron microprobe reported as nickel oxide|
| Cr2O3 | Weight percent concentration of chromium measured by electron microprobe reported as chromium oxide |
|V2O3 | Weight percent concentration of vanadium measured by electron microprobe reported as vanadium oxide |
 |Total | Sum of oxide weight concentrations|

In [116]:
df = pd.read_csv("MVC_EMPAdata.csv")

In [153]:
def show_markdown_table(headers: List[str], data: List) -> str:
    s = f"| {' | '.join(headers)} |\n| {' | '.join([(max(1, len(header) - 1)) * '-' + ':' for header in headers])} |\n"
    for row in data:
        s += f"| {' | '.join([str(item) for item in row])} |\n"
    display(Markdown(s))

## Cleaning up the table

In [117]:
df = df.drop(["Unit Abbreviation", "XAb","XAn", "XOr", "XFa", "XFo", "XTp", "XWo", "XEn", "XFs", "XJd", "XAe", "XQuad"], axis = 1)

In [118]:
df.drop(df.filter(regex="Unname"),axis=1, inplace=True)

In [119]:
df = df.drop_duplicates()
df = df.drop([1612])

In [143]:
col = [column.strip() for column in df.columns]
df.columns = col
df = df.replace("BDL", np.nan)

In [142]:
df = df.astype({"SiO2": float, "TiO2": float, "Al2O3": float, "FeO*": float, "FeO": float, "Fe2O3": float,"CaO": float, "NiO": float, "V2O3": float,"Cr2O3": float, "MnO": float, "Cr2O3": float, "MgO": float})

In [145]:
df.head(2)

Unnamed: 0,Sample Name,Analysis Name,Mineral phase,Context,SiO2,TiO2,Al2O3,FeO*,FeO,Fe2O3,CaO,Na2O,K2O,MnO,MgO,NiO,Cr2O3,V2O3,Total
0,R14DC044,R14DC044-GM-PLAG1,Plagioclase,Groundmass,53.3,,28.6,0.66,,,11.7,4.4,0.3,,,,,,99.0
1,R14DC044,R14DC044-GM-PLAG2,Plagioclase,Groundmass,54.2,,28.2,0.81,,,11.1,4.7,0.3,,,,,,99.3


## Unique Samples

In [91]:
tmp = df["Sample Name"].unique()
print(f"Unique Samples : {len(tmp)}")

Unique Samples : 23


# Stats!

In [146]:
df.dtypes

Sample Name       object
Analysis Name     object
Mineral phase     object
Context           object
SiO2             float64
TiO2             float64
Al2O3            float64
FeO*             float64
FeO              float64
Fe2O3            float64
CaO              float64
Na2O             float64
K2O              float64
MnO              float64
MgO              float64
NiO              float64
Cr2O3            float64
V2O3             float64
Total            float64
dtype: object

In [150]:
describe_df = df.describe()

In [149]:
df.describe()

Unnamed: 0,SiO2,TiO2,Al2O3,FeO*,FeO,Fe2O3,CaO,Na2O,K2O,MnO,MgO,NiO,Cr2O3,V2O3,Total
count,1602.0,335.0,1212.0,1168.0,330.0,333.0,1356.0,1086.0,877.0,734.0,695.0,256.0,78.0,104.0,1612.0
mean,48.627903,9.232836,17.74703,10.07387,26.000606,14.136637,6.351622,5.706906,3.069783,0.779973,20.912374,0.192188,1.280769,0.557692,99.490509
std,17.435379,10.998206,10.410239,15.693972,18.403381,14.218107,6.913224,2.590681,3.138174,0.73488,16.511497,0.097907,3.540785,0.281705,1.187302
min,0.1,0.1,0.1,0.12,0.3,0.4,0.1,0.4,0.1,0.1,0.1,0.1,0.1,0.1,94.1
25%,38.7,0.8,4.8,0.31,8.8,3.0,0.3,4.5,0.3,0.3,3.5,0.1,0.1,0.4,98.9
50%,52.4,3.0,19.1,0.46,19.75,8.9,2.5,6.0,0.9,0.6,21.0,0.2,0.2,0.6,99.6
75%,64.0,22.0,26.8,16.5,48.175,21.6,11.525,7.4,5.8,1.1,37.3,0.3,0.8,0.8,100.2
max,68.7,31.0,35.7,63.2,55.2,65.1,22.1,13.8,10.0,4.4,47.3,0.4,24.9,1.2,103.4


In [213]:
rslt_df = describe_df.sort_values(by = 'mean', axis = 1, ascending=False)

In [214]:
mean_list = rslt_df.iloc[1].tolist()
cols = rslt_df.columns.tolist()

In [215]:
tmp = {}
for count, value in enumerate(mean_list):
    tmp[cols[count]] = value
del tmp["Total"]

# Average amount of each oxide concentration in all samples

In [216]:
show_markdown_table(["Chemical", "Average amount in samples"], [[col, tmp[col]] for col in tmp])

| Chemical | Average amount in samples |
| -------: | ------------------------: |
| SiO2 | 48.627902621723116 |
| FeO | 26.000606060606067 |
| MgO | 20.91237410071943 |
| Al2O3 | 17.747029702970302 |
| Fe2O3 | 14.136636636636629 |
| FeO* | 10.073869863013702 |
| TiO2 | 9.232835820895527 |
| CaO | 6.351622418879059 |
| Na2O | 5.706906077348066 |
| K2O | 3.0697833523375144 |
| Cr2O3 | 1.28076923076923 |
| MnO | 0.779972752043596 |
| V2O3 | 0.5576923076923076 |
| NiO | 0.19218750000000034 |


In [219]:
df.head(1)

Unnamed: 0,Sample Name,Analysis Name,Mineral phase,Context,SiO2,TiO2,Al2O3,FeO*,FeO,Fe2O3,CaO,Na2O,K2O,MnO,MgO,NiO,Cr2O3,V2O3,Total
0,R14DC044,R14DC044-GM-PLAG1,Plagioclase,Groundmass,53.3,,28.6,0.66,,,11.7,4.4,0.3,,,,,,99.0


In [None]:
## Do concentrations based on context and  mineral phase
## Get counts of each mineral phase and two contexts
## See if there if different mineral phases have different contexts
## Start visualization