In [None]:
%matplotlib inline
import sys
sys.path.append("../../..")
from pyfitit import *
initPyfitit()

# Construct Data from Several Samples

In [None]:
sampleFolder = 'samples'
combinedSample = None
for f in os.listdir(sampleFolder):
    sample = readSample(sampleFolder+os.sep+f)
    
    CN = int(f[6:7]) # coordination number
    
    project = loadProject('Fe_tetra.py', CN=CN)

    # function to calculate mean and std of the neighbour atoms of Fe
    def calcDist(params, mol):
        dists = mol.getSortedDists('O')
        return [np.mean(dists[:CN]), np.std(dists[:CN])]
    n = len(sample.params)
    oldParams = sample.paramNames
    
    # add CN, avgDist, stdDist to sample.params database
    sample.addParam(paramName='CN', paramData=np.ones(n)*CN)
    sample.addParam(paramGenerator=calcDist, paramName=['avgDist','stdDist'], project=project)
    
    # delete old geometry parameters, because they differ in different samples
    sample.delParam(oldParams)
    
    # smooth spectra
    sample.spectra = smoothLib.smoothDataFrame(project.FDMNES_smooth, sample.spectra, 'fdmnes', 
         project.spectrum, project.intervals['fit_norm'], folder=sample.folder, norm=project.FDMNES_smooth['norm'])
    
    if combinedSample is None: combinedSample = sample
    else: combinedSample.unionWith(sample)

# Adding Experimental Specta (params - unknown)

In [None]:
exp_spectra_names = []
exp_folder = 'experiments'
exp_files = os.listdir(exp_folder)
exp_files.sort()
print('Unknown experimental spectrum numbering')
i = 0
for f in exp_files:
    name = os.path.splitext(f)[0]
    exp_spectra_names.append(name)
    print(i, name)
    extension = os.path.splitext(f)[1]
    if extension == '.nor':
        sp = readSpectrum(join(exp_folder,f), energyColumn=0, intensityColumn=3, skiprows=38)
    else:
        sp = readSpectrum(join(exp_folder, f), energyColumn=0, intensityColumn=1, skiprows=0, decimal=',')
    combinedSample.addRow(sp)
    i += 1

# Save all data to folder

In [None]:
combinedSample.saveToFolder('sample_combined')

# Calculate spectrum descriptors

In [None]:
# function to add spectrum descriptors to the sample
def addSpectrumDescriptors(sample):
    data = sample.params
    spectra = sample.spectra.to_numpy()
    energy = sample.energy

    ext_min = stableExtrema(spectra, energy, 'min', [7135,7190], plotResultToFolder='plot_min_desc')
    data['pit_e'] = ext_min[:,0]
    data['pit_int'] = ext_min[:,1]
    data['pit_d2'] = ext_min[:,2]

    ext_max = stableExtrema(spectra, energy, 'max', [7120,7150], plotResultToFolder='plot_max_desc')
    data['max_e'] = ext_max[:,0]
    data['max_int'] = ext_max[:,1]
    data['max_d2'] = ext_max[:,2]
    data['pit_e-max_e'] = data['pit_e'] - data['max_e']
    data['max_pit_slope'] = (data['max_int'] - data['pit_int'])/(data['pit_e'] - data['max_e'])

    efermi = efermiDescriptor(spectra, energy)
    data['efermi'] = efermi[:,0]
    data['efermiRate'] = efermi[:,1]

    pca = pcaDescriptor(spectra, 3)
    data['pca1'] = pca[:,0]
    data['pca2'] = pca[:,1]
    data['pca3'] = pca[:,2]

    relpca = relPcaDescriptor(spectra, energy, efermi[:,0],  3)
    data['rel_pca1'] = relpca[:,0]
    data['rel_pca2'] = relpca[:,1]
    data['rel_pca3'] = relpca[:,2]

    sample.params = data

In [None]:
# add descriptors
sampleWithDescriptors = combinedSample.copy()
sampleWithDescriptors.limit([7100, 7200])
addSpectrumDescriptors(sampleWithDescriptors)
sampleWithDescriptors.saveToFolder('sample_with_descriptors')
print('All descriptors of structure and spectrum:', sampleWithDescriptors.paramNames)

# Plot Descriptors

In [None]:
sampleWithDescriptors = readSample('sample_with_descriptors')
knownSample, unknownSample = sampleWithDescriptors.splitUnknown()
plot_descriptors_2d(knownSample.params, descriptor_names=['efermi', 'pca1'], label_names=['avgDist'], 
                    folder_prefix='descr_2d', unknown=unknownSample.params, markersize=50, textsize=0, 
                    alpha=0.3, cv_count=10, plot_data_only=True)

# Calculate Descriptor Quality

In [None]:
descriptor_quality(knownSample.params, label_names=['avgDist'], 
       all_features=['pit_e', 'pit_int', 'pit_d2', 'max_e', 'max_int', 'max_d2', 'pit_e-max_e', 'max_pit_slope', 
       'efermi', 'efermiRate', 'pca1', 'pca2', 'pca3', 'rel_pca1', 'rel_pca2', 'rel_pca3'], 
       feature_subset_size=1, cv_parts_count=5, cv_repeat=1, unknown_data=unknownSample.params, 
       folder='descriptor_quality', printDebug=True)

# Make Descriptor Mixture

In [None]:
mixtureSample = generateMixtureOfSample(size=2000, componentCount=2, sample=knownSample, 
          label_names=['CN', 'avgDist', 'stdDist'], addDescrFunc=addSpectrumDescriptors)
mixtureSample.saveToFolder('sample_of_mixtures')

In [None]:
mixtureSample = readSample('sample_of_mixtures')
plot_descriptors_2d(mixtureSample.params, descriptor_names=['efermi', 'pca1'], label_names=['avgDist'], 
                    folder_prefix='descr_2d_mix', unknown=unknownSample.params, markersize=50, textsize=0, 
                    alpha=0.3, cv_count=10, plot_data_only=True)