In [None]:
import os
import pandas as pd

In [None]:
# Read in raw data

# For XRF, ignore repeats and standards
xrf = pd.read_excel('raw/GAL-DV-21_XRF.xlsx',usecols='A:BB',index_col=0)

# For ICPMS, ignnore repeats and standards
icpms = pd.read_excel('raw/DV-21-ICPMS.xlsx',index_col=0,nrows=54)

# Make directory for processed data
os.makedirs('processed',exist_ok=True)

In [None]:
# Pull normalized major elements from XRF and transpose to make samples index
majors_norm = xrf.iloc[19:29,:].T

# Remove extra space in majors columns
new_cols = majors_norm.columns.str.replace(' ','')
majors_norm_corr = majors_norm.copy()
majors_norm_corr.columns = new_cols

# Pull XRF trace elements and transpose
xrf_trace = xrf.iloc[32:51,:].T

# Remove extra space in XRF Trace columns
new_cols = xrf_trace.columns.str[1:]
xrf_trace_corr = xrf_trace.copy()
xrf_trace_corr.columns = new_cols

# Remove 'ppm' from ICPMS columns
new_cols = icpms.columns.str[:-4]
icpms_corr = icpms.copy()
icpms_corr.columns  = new_cols

# Remove XRF data duplicated by ICPMS
common_cols = xrf_trace_corr.columns.intersection(icpms_corr.columns)
xrf_trace_culled = xrf_trace_corr.drop(common_cols,axis=1)

# Check what is in each file
print(majors_norm_corr.columns)
print(xrf_trace_culled.columns)
print(icpms_corr.columns)
print(majors_norm_corr.index.equals(xrf_trace_culled.index))
print(xrf_trace_culled.index.equals(icpms_corr.index))

# Combine into single dataframe
data_organized = pd.concat([majors_norm_corr,xrf_trace_culled,icpms_corr],axis=1)
print(data_organized.columns)


 

In [None]:
# Read in metadata
meta = pd.read_csv('metadata/gchm_smps_long.csv',index_col=0)

# Isolate columns of interest
meta_cols = ['Latitude','Longitude','Rock_Type','Period','S_Domain']

# Create placeholder rows for 184 and 186 for now
meta.loc['G22184',:] = 0
meta.loc['G22186',:] = 0

# Fix the misspelled Khaishi
typo = meta[meta['S_Domain']=='Khashi'].index
print(typo)
meta.loc[typo,'S_Domain'] = 'Khaishi'

# Isolate rows that were actually run
meta_trimmed = meta.loc[data_organized.index,meta_cols]

# Add metadata to main table
data_final = data_organized.join(meta_trimmed)

In [None]:
# Write to CSV
data_final.to_csv('processed/data.csv')