In [None]:
%matplotlib inline
from pyfitit import *
initPyfitit()

# Gather Data from Several Samples

In [None]:
def combineSamples():
    sampleFolder = 'samples'
    combinedSample = None
    for f in os.listdir(sampleFolder):
        sample = readSample(sampleFolder+os.sep+f)

        CN = int(f[6:7]) # coordination number

        project = loadProject('Fe_tetra.py', CN=CN)

        # function to calculate mean and std of the neighbour atoms of Fe
        def calcDist(params, mol):
            dists = mol.getSortedDists('O')
            return [np.mean(dists[:CN]), np.std(dists[:CN])]
        n = len(sample.params)
        oldParams = sample.paramNames

        # add CN, avgDist, stdDist to sample.params database
        sample.addParam(paramName='CN', paramData=np.ones(n)*CN)
        sample.addParam(paramGenerator=calcDist, paramName=['avgDist','stdDist'], project=project)
        sample.addParam(paramName='name', paramData=np.array([f'cn{CN}_{i}' for i in range(n)], dtype=object))

        # delete old geometry parameters, because they differ in different samples
        sample.delParam(oldParams)

        # smooth spectra
        sample.spectra = smoothLib.smoothDataFrame(project.FDMNES_smooth, sample.spectra, 'fdmnes', 
             project.spectrum, project.intervals['fit_norm'], folder=sample.folder)

        if combinedSample is None: combinedSample = sample
        else: combinedSample.unionWith(sample)
    return combinedSample
combinedSample = cacheInFile('generated/combinedSample.pkl', combineSamples)

# Adding Experimental Specta (params - unknown)

In [None]:
combinedSample = loadData('generated/combinedSample.pkl')
exp_spectra_names = []
exp_folder = 'experiments'
exp_files = os.listdir(exp_folder)
exp_files.sort()
i = 0
for f in exp_files:
    name = os.path.splitext(f)[0]
    exp_spectra_names.append(name)
    print(i, name)
    extension = os.path.splitext(f)[1]
    sp = readSpectrum(join(exp_folder,f), intensityColumn=3, guess=True)
    combinedSample.addRow(sp, {'name':name})
    i += 1

# Save all data to folder

In [None]:
combinedSample.saveToFolder('generated/combinedSample')
combinedSample.limit(energyRange=[7100,7250], inplace=False).plot(fileName='generated/combinedSample/plot.png', colorParam=combinedSample.params['CN'])

# Calculate spectrum descriptors

In [None]:
# function to add spectrum descriptors to the sample
def addSpectrumDescriptors(sample, usePcaPrebuildData=True):
    newSample, goodSpectrumIndices = addDescriptors(sample,
             [{'type':'stableExtrema', 'extremaType':'max', 'energyInterval':[7120,7150]},
              'efermi', 
              {'type':'pca', 'count':3, 'usePcaPrebuildData':usePcaPrebuildData, 'fileName':'generated/pcaPrebuildData.pkl'}, 
              {'type':'rel_pca', 'count':3, 'usePcaPrebuildData':usePcaPrebuildData, 'fileName':'generated/relPcaPrebuildData.pkl'},
              {'type':'min', 'smoothRad':5, 'energyInterval':[7135,7190]},
              {'type':'variation', 'smoothRad':5, 'energyInterval':[7135,7200]},
              {'type':'polynom', 'deg':3, 'energyInterval':[7120, 7140], 'columnName':'polyn_a'},
              {'type':'polynom', 'deg':3, 'energyInterval':[7140, 7160], 'columnName':'polyn_b'}
             ])
    if newSample.getLength() < sample.getLength(): print('Can\'t build descriptors for several spectra')
    d = newSample.params
    newSample.addParam(paramName='min_e-max_e', paramData=d['min_e'] - d['max_e'])
    newSample.addParam(paramName='max_min_slope', paramData=(d['max_i'] - d['min_i'])/(d['min_e'] - d['max_e']))
    return newSample

In [None]:
# add descriptors
sampleWithDescriptors = combinedSample.copy()
sampleWithDescriptors.limit([7100, 7200], inplace=True)
sampleWithDescriptors = cacheInFile('generated/sampleWithDescriptors.pkl', lambda: addSpectrumDescriptors(sampleWithDescriptors, usePcaPrebuildData=False))
sampleWithDescriptors.saveToFolder('generated/sampleWithDescriptors')
sampleWithDescriptors.plot(fileName='generated/sampleWithDescriptors/plot.png', colorParam=sampleWithDescriptors.params['max_e'])
print('All descriptors of structure and spectrum:', sampleWithDescriptors.paramNames)

# Plot Descriptors

In [None]:
sampleWithDescriptors = loadData('generated/sampleWithDescriptors.pkl')
knownSample, unknownSample = sampleWithDescriptors.splitUnknown()
plotDescriptors2d(knownSample.params, descriptor_names=['min_e', 'max_e'], label_names=['avgDist'], 
                    folder_prefix='descr_2d', unknown=unknownSample.params, cv_count=10, plot_only='data and quality', 
                    textColumn='name', textsize=0, dpi=300)

# Calculate Descriptor Quality

In [None]:
all_features = list(set(knownSample.paramNames) - {'name', 'CN', 'avgDist', 'stdDist'})
descriptorQuality(knownSample.params, label_names=['avgDist'], all_features=all_features, 
       feature_subset_size=1, cv_parts_count=5, cv_repeat=1, unknown_data=unknownSample.params, 
       folder='descriptor_quality', printDebug=True)

# Make Descriptor Mixture

In [None]:
mixtureSample = cacheInFile('generated/mixtureSample.pkl', 
    lambda: generateMixtureOfSample(size=2000, componentCount=2, sample=knownSample, 
        label_names=['CN', 'avgDist', 'stdDist'], addDescrFunc=addSpectrumDescriptors))
mixtureSample.saveToFolder('generated/mixtureSample')
mixtureSample.plot(fileName='generated/mixtureSample/plot.png', colorParam=mixtureSample.params['max_e'])

In [None]:
mixtureSample = loadData('generated/mixtureSample.pkl')
plotDescriptors2d(mixtureSample.params, descriptor_names=['polyn_b_3', 'rel_pca2'], label_names=['avgDist'], 
                    folder_prefix='descr_2d_mix', unknown=unknownSample.params, textsize=0, dpi=300, 
                    cv_count=10, plot_only='data and quality', textColumn='name')