In [1]:
import pandas as pd
import json

In [2]:

def read_maximme(json_metadata_path, char_types):
    """Construct dataframes with the needed metadata attached."""

    # Open the file.
    with open(json_metadata_path, 'r') as f:
        metadata = json.load(f)

    # Create an empty list to append the paths to.
    data_path_list = []

    # Iterate through the studies.
    for studies in metadata['studies']:
        for assays in studies['assays']:
            for char in assays['characteristicCategories']:
                if char['characteristicType']['annotationValue'] in char_types:
                    for data_file in assays['dataFiles']:
                        data_path_list.append(data_file['name'])
    return data_path_list


def create_parse_merge_dataframes(path_list):
    """Takes a list of datafile paths converts them to pandas data frames,
    then moves the columns around in a way likely specific to maximes data
    outputs."""

    def create_pandas_df(path):
        # Read the csv file
        df = pd.read_csv(
            filepath_or_buffer=path,
            index_col=False,
            sep='\s+' # Split by whitespace
        )

        # Move the columns to deal with the leading hashtag
        df_mod = df[df.columns[:-1]]
        df_mod.columns = df.columns[1:]

        return df_mod

    # Create the holder list for the data frames to be merged:
    data_frame_list = []

    # Iterate through the paths provided
    for data_path in path_list:
        new_df = create_pandas_df(data_path)
        data_frame_list.append(new_df)

    return pd.concat(data_frame_list, ignore_index=True)


In [3]:
rdf_list = read_maximme('data/nmr_metadata.json', char_types=['Simulated RDF'])

In [9]:
def create_pandas_df(path):
    # Read the csv file
    df = pd.read_csv(
        filepath_or_buffer=path,
        index_col=False,
        sep='\s+' # Split by whitespace
    )

    # Move the columns to deal with the leading hashtag
    df_mod = df[df.columns[:-1]]
    df_mod.columns = df.columns[1:]

    return df_mod


def read_maximme_rdf(json_metadata_path, char_types):
    """Construct dataframes with the needed metadata attached."""

    # Open the file.
    with open(json_metadata_path, 'r') as f:
        metadata = json.load(f)

    # Create an empty list to append the paths to.
    data_frame_list = []

    # Iterate through the studies.
    for studies in metadata['studies']:
        for assays in studies['assays']:
            for char in assays['characteristicCategories']:
                if char['characteristicType']['termSource'] in char_types:
                    # We found a matching termSource, so get the annotationValue
                    dimer_type = char['characteristicType']['annotationValue']
                    for data_file in assays['dataFiles']:
                        # Create the new dataframe
                        new_data_frame = create_pandas_df(data_file['name'])
                        # Add the desired metadata to the data frame:
                        new_data_frame['dimer'] = dimer_type
                        data_frame_list.append(new_data_frame)
                        
    return pd.concat(data_frame_list, ignore_index=True)

In [10]:
df = read_maximme_rdf('data/nmr_metadata.json', char_types=['Aluminate Species'])

In [18]:
df

Unnamed: 0,r,RDF_Al-Ob,RDF_Al-Oh,RCN_Al-Ob,RCN_Al-Oh,dimer
0,0.00,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
1,0.02,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
2,0.04,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
3,0.06,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
4,0.08,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
5,0.10,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
6,0.12,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
7,0.14,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
8,0.16,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+
9,0.18,0.0,0.0000,0.0,0.0000,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+


In [20]:
df.melt(
    id_vars=["r", 'dimer'],
    value_vars=['RDF_Al-Ob','RDF_Al-Oh', 'RCN_Al-Ob', 'RCN_Al-Oh'],
    value_name="inter atom distance",
    var_name="Atom Pair"
)

Unnamed: 0,r,dimer,Atom Pair,inter atom distance
0,0.00,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
1,0.02,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
2,0.04,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
3,0.06,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
4,0.08,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
5,0.10,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
6,0.12,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
7,0.14,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
8,0.16,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
9,0.18,(OH)3Al-O-Al(OH)32-+ 180 H2O + 2 Na+,RDF_Al-Ob,0.0000
