In [1]:
import yaml
import os
import io
import re

import pandas as pd
import numpy as np
import peakutils as pu
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def get_file_list(basedir):
    files = [file for file in os.listdir(basedir) if os.path.isfile(basedir+file)]
    dirs = [d for d in os.listdir(basedir) if os.path.isdir(basedir+d)]
    0
    for d in dirs:
        files = files + [[d, file] for file in get_file_list(basedir+d+'/')]
        
    return files

In [3]:
basedir = 'database/main/'
filelist = get_file_list(basedir)

In [4]:
def load_into_pandas(filelist, basedir):
    df = pd.DataFrame()
    
    for file in filelist:
        tmp = yaml.load(open('/'.join([basedir[:-1]]+file)))
        if tmp['DATA'][0]['type'] == 'tabulated nk':
            df = df.append(yaml2series(tmp, file))
    
    df.sort_index(inplace=True)
    return df

def yaml2series(yd, file):
    df = pd.read_csv(io.StringIO(yd['DATA'][0]['data']),
                     delimiter=' ', header=None, names=['wl', 'n', 'k'])
    
    df['mat'] = file[0]
    df['tbl'] = re.sub('[^\w0-9]', '', re.match('(.+)\.yml', file[1]).group(1))
    
    df = df.set_index(['mat', 'tbl'])
    
    return df

In [5]:
df = load_into_pandas(filelist, basedir)

In [6]:
df['epsre'] = df[['n', 'k']].apply(lambda x: x[0]**2 - x[1]**2, axis=1)
df['epsim'] = df[['n', 'k']].apply(lambda x: 2*x[0]*x[1], axis=1)

In [7]:
def get_anomalous_disp(df):
    deps = np.gradient(df['epsre'])
    peaks = pu.indexes(deps)
    
    peaks = [el for el in peaks if deps[el] > 0]
    
    if not peaks:
        return False
    if any( (df['wl'][peaks] > 0.5) & (df['wl'][peaks] < 5) ):
        return True
    else:
        return False    

In [8]:
g = df.groupby(df.index)
res = g[['wl', 'epsre']].apply(get_anomalous_disp)

index = iter([ind for ind in res.index if res[ind] == True])