In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import DiadFit as pf
import mineralML as mm
import Thermobar as pt
import os


## Here we concatenate all the EDS data by type (standards, olivines, MIglasses)

In [2]:
folder_path = os.getcwd()  # Set the folder path to the current working directory

# Create dictionaries to hold DataFrames for each sheet type
standards_dict = {}
olivines_dict = {}
glasses_dict = {}

# Define the sheet names
sheet_names = {
    'Standards': 'Standards',
    'Olivine_Data': 'Olivine_Data',
    'Glass_Data': 'Glass_Data'
}

# Loop through all files in the specified folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx') or filename.endswith('.xls'):
        if "concatenated" not in filename:
            file_path = os.path.join(folder_path, filename)
            try:
                # Read each sheet into the corresponding DataFrame
                for key, sheet_name in sheet_names.items():
                    df = pd.read_excel(file_path, sheet_name=sheet_name)
                    # Drop columns with "Unnamed" in their header
                    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
                    
                    # Store the DataFrame in the corresponding dictionary
                    if key == 'Standards':
                        standards_dict[os.path.splitext(filename)[0]] = df
                    elif key == 'Olivine_Data':
                        olivines_dict[os.path.splitext(filename)[0]] = df
                    elif key == 'Glass_Data':
                        glasses_dict[os.path.splitext(filename)[0]] = df

            except Exception as e:
                print(f"Error reading {filename}: {e}")

# Concatenate all DataFrames in each dictionary
concatenated_standards = pd.concat(standards_dict.values(), axis=0, join='outer')
concatenated_olivines = pd.concat(olivines_dict.values(), axis=0, join='outer')
concatenated_glasses = pd.concat(glasses_dict.values(), axis=0, join='outer')

# Define the output file path
output_file_path = os.path.join(folder_path, 'concatenated_data.xlsx')

# Use ExcelWriter to export multiple sheets to a single Excel file
with pd.ExcelWriter(output_file_path) as writer:
    concatenated_standards.to_excel(writer, sheet_name='concatenated_standards', index=False)
    concatenated_olivines.to_excel(writer, sheet_name='concatenated_olivines', index=False)
    concatenated_glasses.to_excel(writer, sheet_name='concatenated_glasses', index=False)

print(f"Data has been concatenated and exported to {output_file_path}")


Error reading Oct15th.xlsx: Worksheet named 'Glass_Data' not found
Error reading Oct4th.xlsx: Worksheet named 'Glass_Data' not found
Error reading Oct1718th.xlsx: Worksheet named 'Glass_Data' not found
Error reading Oct25th.xlsx: Worksheet named 'Glass_Data' not found
Error reading R3_June1824_modDec0624.xlsx: Worksheet named 'Glass_Data' not found
Data has been concatenated and exported to /Users/cljd/pCloud Drive/WORK-GENERAL/POSTDOC-UCB/BERKELEY-VIBE/Documents/Projects/Kamaehu2024/2large4GIT/EDS/Processed_data/concatenated_data.xlsx



## run through minml

In [3]:
df_final=pt.minClass(pd.read_excel("concatenated_data.xlsx",sheet_name='concatenated_olivines'))
df_final.to_clipboard(excel=True)

  check_point = torch.load(path)


## If you prefer separate files one by one use this below

In [None]:
import os
import pandas as pd

folder_path = os.getcwd()  # Set the folder path to the current working directory
standards_dict = {}
# sheet_name='Standards'
sheet_name='Olivine_Data'
# sheet_name='Glass_Data'


# Loop through all files in the specified folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx') or filename.endswith('.xls'):
        file_path = os.path.join(folder_path, filename)
        try:
            # Read the "Standards" sheet into a DataFrame
            df = pd.read_excel(file_path, sheet_name=sheet_name)
            # Drop columns with "Unnamed" in their header
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
            # Store the DataFrame in the dictionary using the filename (without extension) as the key
            standards_dict[os.path.splitext(filename)[0]] = df
        except Exception as e:
            print(f"Error reading {filename}: {e}")

# Concatenate all DataFrames in the dictionary
concatenated_df = pd.concat(standards_dict.values(), axis=0, join='outer')

output_file_path = os.path.join(folder_path, 'concatenated_olivines.xlsx')
concatenated_df.to_excel(output_file_path, index=False)