# Notebook for processing conductivity data
## Load raw data, featurize composition, generate PIFS, and upload to Citrination

In [5]:
#add parent directory to sys.path to allow module imports
#see https://stackoverflow.com/questions/714063/importing-modules-from-parent-folder/11158224#11158224
import sys
sys.path.insert(1,'..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import glob
from helpers import fileload as fl
from helpers.calc_chemfeat import perovskite, formula_redfeat, formula_pif
from helpers.predict import predict_from_pifs
from helpers.pickletools import pickle_dict
import pypif.pif
from pypif.obj import *
import copy
from citrination_client import CitrinationClient
import pickle

### Load raw data

In [6]:
tcodir = os.path.join(os.environ['HOME'],'OneDrive - Colorado School of Mines/Research/MIDDMI/TCO')
datadir = os.path.join(tcodir,'data/conductivity') #data/EIS_data
pifdir = os.path.join(datadir,'pifs')
pickledir = os.path.join(tcodir,'scripts/pickles')

def sampledir(sample,src):
    return os.path.join(datadir,src,sample)

#load pickles
calc_feat_Co23Fe34 = pickle_dict(os.path.join(pickledir,'feat_Co23Fe34'))
calc_feat_Co24Fe24 = pickle_dict(os.path.join(pickledir,'feat_Co24Fe24'))
calc_feat_Co34Fe23 = pickle_dict(os.path.join(pickledir,'feat_Co34Fe23'))

### Featurize composition and write to PIFs

In [31]:
cat_ox_lims = {'Co':[2,4],'Fe':[2,4]}
dest = 'Co(2,4)Fe(2,4)'
src = 'in'

destdir = os.path.join(pifdir,dest)

samples = next(os.walk(os.path.join(datadir,src)))[1]

# #save red_feat for reuse
# try:
#     calc_feat_Co24Fe24 #{(sample,point):red_feat}
# except NameError:
#     calc_feat_Co24Fe24 = {}

# try:
#     calc_feat_Co23Fe34
# except NameError:
#     calc_feat_Co23Fe34 = {}
    
# try: #for testing physicality of model
#     calc_feat_Co34Fe23
# except NameError:
#     calc_feat_Co34Fe23 = {}
    
for sample in samples:
    print('------------------\nProcessing {}\n------------------'.format(sample))
    df = fl.load_sample_files(sampledir(sample,src),info_filter={'T_set':['500C'],'atm':'dry'})
    fl.get_formula(df,overwrite=True)
    df = df.loc[df['Sigma_elec']>0,:]
    
    df['log_sigma'] = np.log10(df['Sigma_elec'])

    pifs = {}
    n_feat_calc = 0
    for idx in df.index:
        row = df.loc[idx,:]
        formula = row['formula']
        point = row['Point']
        
        if cat_ox_lims == {'Co':[2,4],'Fe':[2,4]}:
            try:
                red_feat = calc_feat_Co24Fe24[(sample,point)]
            except KeyError:
                red_feat = formula_redfeat(formula,cat_ox_lims=cat_ox_lims)
                calc_feat_Co24Fe24[(sample,point)] = red_feat
                n_feat_calc += 1
        elif cat_ox_lims == {'Co':[2,3],'Fe':[3,4]}:
            try:
                red_feat = calc_feat_Co23Fe34[(sample,point)]
            except KeyError:
                red_feat = formula_redfeat(formula,cat_ox_lims=cat_ox_lims)
                calc_feat_Co23Fe34[(sample,point)] = red_feat
                n_feat_calc += 1
        elif cat_ox_lims == {'Co':[3,4],'Fe':[2,3]}:
            try:
                red_feat = calc_feat_Co34Fe23[(sample,point)]
            except KeyError:
                red_feat = formula_redfeat(formula,cat_ox_lims=cat_ox_lims)
                calc_feat_Co34Fe23[(sample,point)] = red_feat
                n_feat_calc += 1
        else:
            raise Exception('cat_ox_lims {} does not match expected cases'.format(cat_ox_lims))
        
        row_pif, red_feat = formula_pif(formula,cat_ox_lims=cat_ox_lims,red_feat=red_feat)

        "Identifiers"
        sample_num = Id(name='Sample', value=sample[-5:])
        pointid = Id(name='Point',value=int(point)) 
        row_pif.ids = [sample_num, pointid]

        "Properties"
        #output properties - "labels"
        sigma = Property(name='sigma_e',units='S/cm',scalars=row['Sigma_elec'])
        sigma.conditions = [Value(name='Temperature ($^\circ$C)',scalars=int(row['Temp_C'])),
                            Value(name='Atmosphere',scalars=row['atm'])]

        log_sigma = Property(name='log_sigma_e',scalars=row['log_sigma'])
        log_sigma.conditions = [Value(name='Temperature ($^\circ$C)',scalars=int(row['Temp_C'])),
                            Value(name='Atmosphere',scalars=row['atm'])]

        row_pif.properties += [sigma,log_sigma]

        #input propertes - "features"
#             pvskt = perovskite(formula=row['formula'], site_ox_lim={'A':[2,4],'B':[2,4]},site_base_ox={'A':2,'B':4})
#             #pvskt.set_cat_ox_lim('Co',[2,3])
#             pvskt.featurize()
#             props_oxavg = props.copy()
#             #filtered_feat = {k:v for (k,v) in pvskt.features.items() if k[-5:] not in ['oxmin','oxmax']}
#             for feat, val in pvskt.features.items():
#                 prop = Property(name=feat,scalars=val)
#                 props.append(prop)
#                 if feat[-5:] not in ['oxmin','oxmax'] and feat[0:7]!='O_delta':
#                     props_oxavg.append(prop)
#             row_pif_oxavg = copy.copy(row_pif)
#             row_pif.properties = props
#             row_pif_oxavg.properties = props_oxavg
#             oxavg_pifs.append(row_pif_oxavg)
        pifs[point] = row_pif
    
    print('Calculated red_feat for {} pifs'.format(n_feat_calc))
    
    outfile = os.path.join(destdir,'{}_oxavg_pif.json'.format(sample))
    file = open(outfile,'w')
    pypif.pif.dump(list(pifs.values()),file,indent=0)
    file.close()    
    print('Dumped {} pifs in {}'.format(len(df), os.path.basename(outfile)))

#     oxavg_outfile = os.path.join(pifdir,'{}_oxavg_pif.json'.format(sample))
#     oxavg_file = open(oxavg_outfile,'w')
#     for rp in oxavg_pifs:
#         pif.dump(rp,oxavg_file)
#     oxavg_file.close()
#     print('Dumped {} pifs in {}'.format(len(oxavg_pifs), os.path.basename(oxavg_outfile)))

#update pickle files with any new entries
calc_feat_Co23Fe34.update_file()
calc_feat_Co24Fe24.update_file()
calc_feat_Co34Fe23.update_file()

------------------
Processing PDAC_COM3_01251
------------------
Calculated red_feat for 0 pifs
Dumped 130 pifs in PDAC_COM3_01251_oxavg_pif.json
------------------
Processing PDAC_COM3_01254
------------------
Calculated red_feat for 0 pifs
Dumped 132 pifs in PDAC_COM3_01254_oxavg_pif.json
------------------
Processing PDAC_COM3_01255
------------------
Calculated red_feat for 0 pifs
Dumped 131 pifs in PDAC_COM3_01255_oxavg_pif.json
------------------
Processing PDAC_COM3_01256
------------------
Calculated red_feat for 0 pifs
Dumped 131 pifs in PDAC_COM3_01256_oxavg_pif.json


### Upload PIFs to datasets

In [12]:
"create dataset if not existing"
# client = CitrinationClient(os.environ['CITRINATION_API_KEY'],'https://citrination.com')
# dataset1 = client.create_dataset(name='tco_sigma_elec_full',
#                                 description='Electrical conductivity for TCO thin films with all features',
#                                 public=0)

# dataset2 = client.create_dataset(name='tco_sigma_elec_oxavg_Co[2,3]Fe[3,4]',
#                                 description='Electrical conductivity for TCO thin films with oxavg features only',
#                                 public=0)

# dataset.id, dataset2.id

# dataset4 = client.create_dataset(name='tco_sigma_elec_oxavg_Co[2,4]Fe[2,4]',
#                                 description='Electrical conductivity for TCO thin films with oxavg features only. Cation ox limits: Co: [2,4]; Fe: [2,4]',
#                                 public=0)

dataset5 = client.create_dataset(name='tco_sigma_elec_oxavg_Co[3,4]Fe[2,3]',
                                description='Electrical conductivity for TCO thin films with oxavg features only. Cation ox limits: Co: [3,4]; Fe: [2,3]',
                                public=0)

dataset5.id

162971

### Datasets
tco_sigma_elec_oxavg_**Co[2,3]Fe[3,4]: 162725**

tco_sigma_elec_oxavg_**Co[2,4]Fe[2,4]: 162765**

tco_sigma_elec_oxavg_**Co[3,4]Fe[2,3]: 162971**

In [13]:
"general pif upload"
client = CitrinationClient(os.environ['CITRINATION_API_KEY'],'https://citrination.com')
dsid = 162971 

upload_dir = os.path.join(pifdir,'Co(3,4)Fe(2,3)')

results = []

for file in glob.glob(os.path.join(upload_dir,'*oxavg*.json')):
    result = client.upload(dsid,file)
    results.append(result)

[r.__dict__ for r in results]

[{'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(3,4)Fe(2,3)\\PDAC_COM3_01251_oxavg_pif.json'}]},
 {'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(3,4)Fe(2,3)\\PDAC_COM3_01254_oxavg_pif.json'}]},
 {'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(3,4)Fe(2,3)\\PDAC_COM3_01255_oxavg_pif.json'}]},
 {'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(3,4)Fe(2,3)\\PDAC_COM3_01256_oxavg_pif.json'}]}]

In [139]:
glob.glob(os.path.join('C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)','*'))

['C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01251_oxavg_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01251_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01254_oxavg_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01254_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01255_oxavg_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\Co(2,4)Fe(2,4)\\PDAC_COM3_01255_pif.json',
 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pi

In [87]:
"Upload pifs"
client = CitrinationClient(os.environ['CITRINATION_API_KEY'],'https://citrination.com')
dsid1 = 162724 #full
dsid2 = 162725 #oxavg only Co[2,3]Fe[3,4]

results1 = []
results2 = []

for file in glob.glob(os.path.join(pifdir,'*.json')):
    filename = os.path.basename(file)
    if file.find('oxavg') >= 0:
        result = client.upload(dsid2,file)
        results2.append(result)
        #print('oxavg: {}'.format(filename))
    else:
        result = client.upload(dsid1,file)
        results1.append(result)
        #print('full: ' + filename)
        
[r.__dict__ for r in results1], [r.__dict__ for r in results2]

([{'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01251_pif.json'}]},
  {'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01254_pif.json'}]},
  {'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01255_pif.json'}]},
  {'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01256_pif.json'}]}],
 [{'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01251_oxavg_pif.json'}]},
  {'_failures': [],
   '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado Schoo

In [97]:
"dataset excluding 01255"
dataset3 = client.create_dataset(name='tco_sigma_elec_excl01255',
                                description='Electrical conductivity for TCO thin films with oxavg features only and 01255 excluded',
                                public=0)
dataset3.id

162726

In [100]:
"Upload pifs to dataset 3"
dsid3 = 162726 #oxavg only, exclude 01255

results3 = []

for file in glob.glob(os.path.join(pifdir,'*.json')):
    filename = os.path.basename(file)
    if file.find('oxavg') >= 0 and file.find('01255')==-1:
        result = client.upload(dsid3,file)
        results3.append(result)
        print(filename)
        
[r.__dict__ for r in results3]

PDAC_COM3_01251_oxavg_pif.json
PDAC_COM3_01254_oxavg_pif.json
PDAC_COM3_01256_oxavg_pif.json


[{'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01251_oxavg_pif.json'}]},
 {'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01254_oxavg_pif.json'}]},
 {'_failures': [],
  '_successes': [{'path': 'C:\\Users\\jdhuang\\OneDrive - Colorado School of Mines/Research/MIDDMI/TCO\\data/conductivity\\pifs\\PDAC_COM3_01256_oxavg_pif.json'}]}]

In [90]:
data = pd.DataFrame()
samples = next(os.walk(os.path.join(datadir,src)))[1]
for sample in samples:
    df = fl.load_sample_files(sampledir(sample,src),info_filter={'T_set':['500C'],'atm':'dry'})
    df['sample'] = sample
    data = data.append(df,sort=True)
    
neg = data.loc[data['Sigma_elec']<=0,:]
pos = data.loc[data['Sigma_elec']>=0,:]


In [145]:
row_pif.properties[0]

<pypif.obj.common.property.Property at 0x27e0da3fc88>

In [96]:
low = data.loc[data['Sigma_elec']<5e-6,:]
for s in low['sample'].unique():
    print(s,len(low[low['sample']==s]))

PDAC_COM3_01251 3
PDAC_COM3_01254 22
PDAC_COM3_01255 131
PDAC_COM3_01256 1
