In [8]:
import os
import re
from collections import Counter
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_context('notebook')
sns.set(style='darkgrid')
from pipe import Pipe
import easier as ezr

%matplotlib inline
import pylab as pl
import holoviews as hv
# hv.extension('bokeh')

pd.set_option("display.max_columns",101)

In [2]:
# the directory holding data files
data_dir = './20180318/'

# the file name for the test log (it should live in the data directory)
test_log_file_base_name = 'test_log_20180318.csv'

# this is the maximum number of identical samples to take 
max_samples = 3
channel_mapper= dict(
    a='sig_gen',
    b='res_volt',
    c='rec_volt',
    d='sec_volt'
)

# compute the full path of the test log file
test_log_file = os.path.join(data_dir, test_log_file_base_name)

In [3]:
def find_files(data_dir):
    """
    Find all data files under a specified directory
    """
    # the regex pattern for identifying a data file
    rex_file = re.compile(r'.*/?\d+\-\d+(_\d+)?\.csv')
    
    # initialize empty list of data files
    data_files = []
    
    # recursively search data directory
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            # only consider data files
            if rex_file.match(file):
                
                # compute the full path to the datafile
                file_name = os.path.join(root, file)
                
                # the file_tag is what is put into the log file
                file_tag = re.sub(r'(_\d+)?.csv', '', file)
                
                # add the data file
                data_files.append((file_tag, file_name))
    
    # create and return the output dataframe
    df_files = pd.DataFrame(data_files, columns=['file_tag', 'file_name'])
    return df_files

In [4]:
def make_log_frame(test_log_file, data_dir, max_samples):
    # read in the test log
    df_log = pd.read_csv(test_log_file).drop(['test_no'], axis=1)
    
    # standardize column names
    df_log = df_log.rename(
        columns=dict(primary_position='pos', file_name='file_tag', fatigue_life='life'))
    
    # sometimes the file will have blank fields.  These are garbage
    df_log.dropna(inplace=True)
    
    # get a frame of all files in the data directory
    df_files = find_files(data_dir)
    
    # Use the file_tag to link each data file with its corresponding log-file entry
    df_out = pd.merge(df_log, df_files, on=['file_tag'], how='right')
    
    
    def compute_sample_num(batch):
        """
        For each condition measured, this function computes the sample numbe
        """
        batch.insert(3, 'sample_num', np.array(range(len(batch))) + 1)
        return batch  
    
    # These fields identify measurement conditions (that can be sample multiple times)
    grouping_fields = ['sample', 'frequency', 'bends', 'pos']
    
    # This will order by filename within batches, although it's no clear that's needed
    sorting_fields = grouping_fields + ['file_name']
    df_out = df_out.sort_values(by=sorting_fields)
    
    # Actually run the groupby to assign sample number
    df_out = df_out.groupby(by=grouping_fields).apply(compute_sample_num)
    
    # Select the final output fields you want in the log frame
    df_out = df_out[grouping_fields + ['sample_num', 'file_name']]
    
    # Don't include more than max_samples for each measurement condition
    df_out = df_out[df_out.sample_num <= max_samples].reset_index(drop=True)
    
    return df_out

df_log = make_log_frame(test_log_file, data_dir, max_samples=max_samples)
display(df_log.head(5))
print(len(df_log))

Unnamed: 0,sample,frequency,bends,pos,sample_num,file_name
0,ss_0,50.0,0.0,28.0,1,./20180318/20180318-0082/20180318-0082_01.csv
1,ss_0,50.0,0.0,28.0,2,./20180318/20180318-0082/20180318-0082_02.csv
2,ss_0,50.0,0.0,28.0,3,./20180318/20180318-0082/20180318-0082_03.csv
3,ss_0,100.0,0.0,20.0,1,./20180318/20180318-0001.csv
4,ss_0,100.0,0.0,20.0,2,./20180318/20180318-0002.csv


90


In [52]:
def get_data(df_log, data_dir, channel_mapper, njobs=1, recompute=False):
    """
    Run the computation across files to extract features
    """
    # Define the file (in the data directory) that will hold the analysis results
    results_file = os.path.join(data_dir, 'results.txt')
    
    # These computations can take a really long time, so only recompute if you have to
    if recompute:
        
        # We will be appending a bunch of frames, so initialze to no frame
        p = Pipe(df_log, channel_mapper, n_jobs=njobs, harmonic=3)
        p.process()
        df = p.df

        df.to_csv(results_file, index=False)
        
    df = pd.read_csv(results_file)
    return df

with ezr.Timer('get_data'):
    df = get_data(df_log, data_dir, channel_mapper, njobs=2, recompute=False)
    
####################3
#TODO: Bake this into the log frame
df.rename(columns=dict(sample='sample_name'), inplace=True)
df.loc[:, 'sample_name'] = df.sample_name.str.replace('_.*', '')
df.head()
###################

__time__,0.003457,get_data


Unnamed: 0,sample_name,frequency,bends,pos,sample_num,prim_sec_amp,prim_sec_phi,prim_rec_amp,prim_rec_phi,sec_rec_amp,sec_rec_phi,sec_harm_db,rec_harm_db
0,ss,50.0,0.0,28.0,1,0.000654,0.045091,2.3e-05,-3.122779,0.035645,3.115316,-37.431325,-37.621854
1,ss,50.0,0.0,28.0,2,0.000654,0.045099,2.3e-05,-3.119578,0.035686,3.118508,-37.478347,-38.722307
2,ss,50.0,0.0,28.0,3,0.000653,0.045447,2.3e-05,-3.117868,0.035573,3.11987,-37.500759,-39.190102
3,ss,100.0,0.0,20.0,1,0.000652,0.085451,2.3e-05,-3.100622,0.035742,3.097112,-43.677038,-43.840512
4,ss,100.0,0.0,20.0,2,0.000652,0.085426,2.3e-05,-3.101346,0.035739,3.096413,-43.673625,-44.123298


In [60]:
def plot_experiment(df, xparams, col_name):
    for k, v in xparams.items():
        if v != '*':
            df = df[df[k] == v]
            
    overlay_keys = sorted([k for (k, v) in xparams.items() if v == '*'])
    for key, batch in df.groupby(by=overlay_keys):
        print(key)
        display(batch)
        
        """
        So here is the idea with this.  the xparams dictionary will contain all the experimental values
        that are fixed.  These are designated by actual values
        
        If a value is listed as a '*', then this value becomes overlays in the resulting plots
        
        In each of these, I want to keep a reference to a baseline somehow.  Maybe that information
        should go after the star??? So something like '*/0.' to plot everything with respect to that
        baseline value.  Maybe I should make the overlays be based on only a single field rather than
        a tuple of fields.
        
        
        
        EVERYTHING BELOW THIS CELL IS OLD STUFF
        """
    
        
    
    

xparams = dict(
    sample_name='ss',
    bends='*',
    frequency=100,
)
plot_experiment(df, xparams, 'prim_sec_amp')

0.0


Unnamed: 0,sample_name,frequency,bends,pos,sample_num,prim_sec_amp,prim_sec_phi,prim_rec_amp,prim_rec_phi,sec_rec_amp,sec_rec_phi,sec_harm_db,rec_harm_db
3,ss,100.0,0.0,20.0,1,0.000652,0.085451,2.3e-05,-3.100622,0.035742,3.097112,-43.677038,-43.840512
4,ss,100.0,0.0,20.0,2,0.000652,0.085426,2.3e-05,-3.101346,0.035739,3.096413,-43.673625,-44.123298
5,ss,100.0,0.0,20.0,3,0.000652,0.085522,2.3e-05,-3.101619,0.035734,3.096045,-43.641107,-43.461985
6,ss,100.0,0.0,28.0,1,0.000652,0.086731,2.3e-05,-3.100752,0.035731,3.095702,-43.664789,-43.942973
7,ss,100.0,0.0,28.0,2,0.000652,0.086657,2.3e-05,-3.101124,0.035729,3.095405,-43.717591,-43.654159
8,ss,100.0,0.0,28.0,3,0.000652,0.086356,2.3e-05,-3.101158,0.03574,3.095671,-43.658477,-44.042628
9,ss,100.0,0.0,40.0,1,0.000652,0.086705,2.3e-05,-3.101221,0.035744,3.09526,-43.752796,-43.952844
10,ss,100.0,0.0,40.0,2,0.000652,0.086267,2.3e-05,-3.100888,0.035746,3.096031,-43.703124,-43.912037
11,ss,100.0,0.0,40.0,3,0.000652,0.086388,2.3e-05,-3.101019,0.035741,3.095779,-43.626462,-43.628255


109.0


Unnamed: 0,sample_name,frequency,bends,pos,sample_num,prim_sec_amp,prim_sec_phi,prim_rec_amp,prim_rec_phi,sec_rec_amp,sec_rec_phi,sec_harm_db,rec_harm_db
54,ss,100.0,109.0,20.0,1,0.000656,0.085528,2.6e-05,-3.097533,0.039375,3.100124,-43.23263,-43.462207
55,ss,100.0,109.0,20.0,2,0.000656,0.085583,2.6e-05,-3.097306,0.039377,3.100297,-43.204002,-43.17317
56,ss,100.0,109.0,20.0,3,0.000656,0.085597,2.6e-05,-3.097802,0.039382,3.099786,-43.210519,-43.371574
57,ss,100.0,109.0,28.0,1,0.000718,0.101303,2.7e-05,-3.084623,0.038022,3.097259,-38.444982,-41.059915
58,ss,100.0,109.0,28.0,2,0.000718,0.101354,2.7e-05,-3.084358,0.038033,3.097473,-38.429526,-41.243082
59,ss,100.0,109.0,28.0,3,0.000718,0.101142,2.7e-05,-3.084578,0.038031,3.097465,-38.414801,-41.058041
60,ss,100.0,109.0,40.0,1,0.000689,0.094116,2.5e-05,-3.093868,0.035987,3.095202,-40.559685,-42.98127
61,ss,100.0,109.0,40.0,2,0.000689,0.094084,2.5e-05,-3.092675,0.035985,3.096426,-40.549283,-43.203739
62,ss,100.0,109.0,40.0,3,0.000689,0.094078,2.5e-05,-3.093201,0.035984,3.095906,-40.568017,-43.24585


175.0


Unnamed: 0,sample_name,frequency,bends,pos,sample_num,prim_sec_amp,prim_sec_phi,prim_rec_amp,prim_rec_phi,sec_rec_amp,sec_rec_phi,sec_harm_db,rec_harm_db
72,ss,100.0,175.0,20.0,1,0.000653,0.086855,2.8e-05,-3.094727,0.04352,3.101604,-43.510779,-42.641567
73,ss,100.0,175.0,20.0,2,0.000653,0.086514,2.8e-05,-3.094637,0.04352,3.102035,-43.370873,-42.38989
74,ss,100.0,175.0,20.0,3,0.000654,0.086438,2.8e-05,-3.094402,0.043528,3.102345,-43.408215,-42.743018
75,ss,100.0,175.0,28.0,1,0.000794,0.118781,3.1e-05,-3.063328,0.039198,3.101076,-34.389052,-35.738616
76,ss,100.0,175.0,28.0,2,0.000794,0.118753,3.1e-05,-3.063606,0.03921,3.100827,-34.360955,-35.447354
77,ss,100.0,175.0,28.0,3,0.000794,0.118609,3.1e-05,-3.063355,0.039213,3.101221,-34.364358,-35.521223
78,ss,100.0,175.0,40.0,1,0.000712,0.100593,2.5e-05,-3.088484,0.035449,3.094108,-38.444466,-41.908737
79,ss,100.0,175.0,40.0,2,0.000712,0.100629,2.5e-05,-3.088636,0.035453,3.093921,-38.430089,-41.77515
80,ss,100.0,175.0,40.0,3,0.000712,0.100501,2.5e-05,-3.088655,0.035434,3.094029,-38.440717,-42.086441


218.0


Unnamed: 0,sample_name,frequency,bends,pos,sample_num,prim_sec_amp,prim_sec_phi,prim_rec_amp,prim_rec_phi,sec_rec_amp,sec_rec_phi,sec_harm_db,rec_harm_db
30,ss,100.0,218.0,20.0,1,0.000653,0.086548,3e-05,-3.091987,0.046525,3.10465,-43.470421,-42.170614
31,ss,100.0,218.0,20.0,2,0.000653,0.086451,3e-05,-3.091333,0.04653,3.105401,-43.482711,-42.314721
32,ss,100.0,218.0,20.0,3,0.000653,0.086382,3e-05,-3.092332,0.046525,3.10447,-43.460855,-41.981809
33,ss,100.0,218.0,28.0,1,0.00086,0.133436,3.4e-05,-3.048332,0.039993,3.101417,-33.294701,-34.396181
34,ss,100.0,218.0,28.0,2,0.00086,0.133068,3.4e-05,-3.048439,0.039988,3.101679,-33.298419,-34.373457
35,ss,100.0,218.0,28.0,3,0.00086,0.132825,3.4e-05,-3.049076,0.039992,3.101285,-33.289123,-34.270061
36,ss,100.0,218.0,40.0,1,0.000717,0.101315,2.5e-05,-3.08872,0.035428,3.093151,-38.769598,-42.487922
37,ss,100.0,218.0,40.0,2,0.000717,0.101256,2.5e-05,-3.088375,0.035434,3.093554,-38.76052,-42.464743
38,ss,100.0,218.0,40.0,3,0.000717,0.101178,2.5e-05,-3.088134,0.035441,3.093873,-38.739765,-42.586832


In [57]:
df.frequency.unique()

array([   50.,   100.,   200.,   500.,  1000.])

In [21]:
# def get_col(df, col_name):
#     df = df.pivot(index='pos', columns='pipe_label', values=col_name)
#     df = df.reset_index(drop=False)
#     df.index.name = col_name
#     df.columns.name = None
#     return df

def get_col(df, col_name):
    df = df.set_index(['pos', 'life']).unstack('life')
    return df
    df = df.pivot(index='pos', columns='pipe_label', values=col_name)
    df = df.reset_index(drop=False)
    df.index.name = col_name
    df.columns.name = None
    return df

In [None]:
def do_plot(df, field_name, pipe_labels, title=None, ylabel=None):
    xlabel = 'Position along Pipe (inches)'
    
    dfx = get_col(df, field_name)
    pl.rcParams['figure.figsize'] = (20, 6)
    sns.set_context('talk')
    
    pl.subplot(131)
    for pipe_label in pipe_labels:
        pl.plot(dfx.pos, dfx[pipe_label], '.-', label=pipe_label, )
    pl.plot(dfx.pos, dfx.virgin, '.-', label='Virgin', )
    pl.legend(loc='best')
    pl.xlabel(xlabel)
    if ylabel is not None:
        pl.title(title);
        
    pl.subplot(132)
    for pipe_label in pipe_labels:
        pl.plot(dfx.pos, dfx[pipe_label] / dfx.virgin, '.-', label=f'{pipe_label} ratio')
    pl.xlabel(xlabel)
    pl.legend(loc='best')
    pl.title(title)
    pl.subplot(133)
    for pipe_label in pipe_labels:
        pl.plot(dfx.pos, dfx[pipe_label] - dfx.virgin, '.-', label=f'{pipe_label} diff')
    pl.xlabel(xlabel)
    pl.title(title)
    pl.legend(loc='best')

pipe_labels = [
    'ss_50',
    'ss_80',
    'ss_100_a',
    'ss_100_b',
]


pl.figure()
do_plot(df, 'prim_sec_amp', pipe_labels, title='Primary-Secondary Coupling', ylabel='Mutual Inductance')

# pl.figure()
# do_plot(df, 'prim_rec_amp', pipe_labels, title='Primary-Receiver Coupling', ylabel='Mutual Inductance')
# pl.figure()
# do_plot(df, 'sec_harm_db', pipe_labels, title='3rd Harmonic Power', ylabel='dB')



In [None]:
df.head()

In [None]:
from daq.pico import CSV
from harmonic import Harmonic
from easier import shade
from scipy import signal

In [None]:
file_name = './20180312/20180312-0005.csv'
df = CSV(file_name=file_name, max_sample_freq=1e9, **channel_mapper).df
# filter_cols = ['res_volt', 'sec_volt', 'rec_volt']
# for col in filter_cols:
#     # 8 pol filter at .01 of nyquist
#     b, a = signal.butter(8, 0.01)
#     df.loc[:, col] = signal.filtfilt(b, a, df[col].values, padlen=150)


harmonics = [1, 3,]
h_i_prim = Harmonic(harmonics=harmonics)
h_i_prim.fit(df.t, df.res_volt)
h_i_prim = h_i_prim.derivative()

h_v_sec = Harmonic(harmonics=harmonics)
h_v_sec.fit(df.t, df.sec_volt)

h_v_rec = Harmonic(harmonics=harmonics)
h_v_rec.fit(df.t, df.rec_volt)

# compute "impedence" objects
h_z_prim_sec = h_v_sec / h_i_prim
h_z_prim_rec = h_v_rec / h_i_prim
h_z_sec_rec = h_v_rec / h_v_sec



In [None]:
%%opts RGB [width=800, height=350]
x, y = df.t, df.sec_volt
yf = h_v_sec.predict(df.t)

((
    shade(hv.Curve((x, y)), color='blue')
    *shade(hv.Curve((x, yf)), color='red')
) + (
    shade(hv.Curve((x, y - yf), vdims=['sss']), color='blue')
)).cols(1)


In [None]:
%%opts RGB [width=800, height=350]
x, y = df.t, df.sec_volt
yf = h_v_sec.predict(df.t)

((
    shade(hv.Curve((x, y)), color='blue')
    *shade(hv.Curve((x, yf)), color='red')
) + (
    shade(hv.Curve((x, y - yf), vdims=['sss']), color='blue')
)).cols(1)


In [None]:
%%opts RGB [width=800, height=350]
from scipy import signal
t, y = df.t.values, df.sec_volt.values

b, a = signal.butter(8, 0.01)
yf1 = signal.filtfilt(b, a, y, padlen=150)

kd = hv.Dimension('time', range=(0, .1))
vd = hv.Dimension('amp', range=(-6.5, 6.5))

(
shade(hv.Curve((t, y), kdims=[kd], vdims=[vd]))    
*shade(hv.Curve((t, yf1), kdims=[kd], vdims=[vd]), color='red')  
*shade(hv.Curve((t, yf2), kdims=[kd], vdims=[vd]), color='green')  
    
)


In [None]:
.005 * round(.5 / (t[1] - t[0]))

In [None]:
class SFFT:
    def _get_padded_length(self, initial_length, interp_exp=0):
        for nn in range(int(1e6)):
            padded_length = 2 ** nn
            if padded_length >= initial_length:
                break
        return padded_length * 2 ** interp_exp

    def fft(self, time, amplitude, interp_exp=3):
        # demean the signal
        amplitude = amplitude - np.mean(amplitude)

        # pad length to power of two with maybe some interpolation
        padded_length = self._get_padded_length(len(amplitude), interp_exp=interp_exp)

        # get the sample time
        dt = np.median(np.diff(time))

        # compute the fft
        z = np.fft.fft(amplitude, n=padded_length)

        # define a slice for postive frequencies
        ind = slice(0, int((len(z) / 4)))

        # get positive amplitudes
        amp_f = np.abs(z)[ind]

        # compute positive freqs
        f = np.fft.fftfreq(len(z), d=dt)[ind]
        return f, amp_f
   
f, amp = SFFT().fft(df.t, df.sec_volt - df.sec_volt.mean())

In [None]:
%%opts Curve [width=800, height=350 logy=False, logx=False]
hv.Curve((f, amp))

In [None]:
pl.loglog(f, amp)

In [None]:
from astropy import units as u

In [None]:
C = 80 * u.uF
(1. / (1j * 2 * np.pi * 50000 * u.Hz * C)).to(u.Ohm)

In [None]:
f = 50 * u.kHz
w = 2 * np.pi * f
C = 10 * u.uF
X = 1. / (1j * w * C)
X.to(u.Ohm)

In [None]:
# demean the signal
amplitude = amplitude - np.mean(amplitude)

# pad length to power of two with maybe some interpolation
padded_length = self._get_padded_length(len(amplitude), interp_exp=interp_exp)

# get the sample time
dt = np.median(np.diff(time))

# compute the fft
z = fft(amplitude, n=padded_length)

# define a slice for postive frequencies
ind = slice(0, int((len(z) / 2)))

# get positive amplitudes
amp_f = np.abs(z)[ind]

# compute positive freqs
f = np.fft.fftfreq(len(z), d=dt)[ind]

# return the max freq
return f[np.where(amp_f == np.max(amp_f))[0]][0]

In [None]:
def get_data(df_log, data_dir, njobs=1, reload=False):
    results_file = os.path.join(data_dir, 'results.txt')
    if reload:
        df = None
        for (pipe_label, _, freq), batch in df_log.groupby(by=['sample', 'fatigue_life', 'frequency']):
            pipe_label = f'{pipe_label}pct_{freq:0.0f}hz'
            p = Pipe(pipe_label, batch, channel_mapper, n_jobs=njobs, harmonic=3)
            p.process()
            if df is None:
                df = p.df
            else:
                df = df.append(p.df, ignore_index=True)
        df.to_csv(results_file, index=False)
        
    df = pd.read_csv(results_file)
    df = df.rename(columns={'pipe': 'pipe_label'})
    dfj = pd.merge(df, df_log, on=['pipe_label', 'pos'], how='left')
    print('-'*80)
    display(df_log.tail())
    print('-'*80)
    display(df.tail())
    print('-'*80)
    display(dfj.tail())
    
    fields = [
        'fatigue_life',
        'frequency'
    ]
    
    for field in fields:
        df.insert(1, field, dfj[field])
    
    return df

with ezr.Timer('get_data'):
    df = get_data(df_log, data_dir, njobs=2, reload=True, max_files=3)
df = df[df.pipe_label.str.startswith('ss')]
# df.head(3)