In [1]:
import os
import json
import pandas as pd
import numpy as np

In [3]:
def generate_labels(file_list, saveto):
    labels = {}
    for file in file_list:
        f = open(input_dir+file)
        data = json.load(f)
        name = file.split(".json")[0]
        labels[name] = {"phase_id": data["sample"]["material"]["phase_id"]}
        # FIND TEC IN 1e-6/K UNITS
        property_name = data["sample"]["measurement"][0]["property"]["name"]
        if property_name == "linear thermal expansion coefficient":    
            labels[name].update({"thermal_expansion": 1e6*data["sample"]["measurement"][0]["property"]["scalar"]})  
        elif property_name == "volume thermal expansion coefficient":
            # Convert volume TEC to linear TEC by dividing by 3
            labels[name].update({"thermal_expansion": (1e6/3)*data["sample"]["measurement"][0]["property"]["scalar"]})
        else:
            raise Exception("Unexpected property found: ", property_name)
        
        # Save temperature conditions of 
        if "scalar" in data["sample"]["measurement"][0]["condition"][0].keys():
            labels[name].update({"temperature": data["sample"]["measurement"][0]["condition"][0]["scalar"]})
        else:
            # If a range of temperatures is given, record the midpoint 
            labels[name].update({"temperature": np.mean(data["sample"]["measurement"][0]["condition"][0]["range"])})
            # TODO: dont just use mean of data range
        f.close()
        
    # Save to file
    labels_df = pd.DataFrame.from_dict(labels).T
    labels_df.to_csv(saveto)

In [5]:
# File Management
input_dir = "data_linear/"  # Input json files
output_dir = "labels/"  # Output labels
filename = "labels_linear.csv"  # name of labels file

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

# Load all CIF files in
file_type = ".json"
files = os.listdir(input_dir)
json_files = [file for file in files if file.endswith(file_type)]
# Make Output Directory if needed
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

generate_labels(json_files, output_dir+filename)