In [42]:
# Approach 1: Find derivative of graph, delete points with too high of a change in slope

import csv, numpy
import plotly.plotly as py
import plotly.graph_objs as graphs
from scipy import interpolate
from scipy.signal import savgol_filter

# Load file into 2d list
with open('../data/P7132033_37.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)
    zc_str = list(reader)

# Format zc_str to floats and remove low frequency values
zc_x = list()
zc_y = list()
for x, y in zc_str:
    if float(y) > 44000:
        zc_x.append(float(x))
        zc_y.append(float(y))

# Get dy and dy2
prev_y = 0
dy = list()
dy2 = list()
for y in zc_y:
    dy.append(abs(y-prev_y))
    if not len(dy) == 1:
        dy2.append(abs(dy[-1]-dy[-2]))
    prev_y = y
    
# Define cutoff dy for ignoring noise
cutoff = 700
    
# Smooth graph- Savitsky-Golay filter
yhat2 = savgol_filter(zc_y, 27, 2)
yhat3 = savgol_filter(zc_y, 17, 3)
yhat4 = savgol_filter(zc_y, 17, 4) #best fit so far

# ELIMINATE NOISE
i = 0
noiseless_x = list()
noiseless_y = list()
while i < len(dy2):
    if dy2[i] < cutoff:
        noiseless_x.append(zc_x[i])
        noiseless_y.append(yhat4[i])
    i += 1

# Graph data
trace_noisy = graphs.Scatter(
                            x = zc_x, 
                            y = zc_y,
                            mode = 'markers',
                            name = 'ZC- noisy'
                        )
trace_smooth = graphs.Scatter(
                            x = zc_x, 
                            y = yhat4,
                            name = 'ZC- smoothed'
                        )
trace_noiseless = graphs.Scatter(
                            x = noiseless_x, 
                            y = noiseless_y,
                            mode = 'markers',
                            name = 'ZC- noiseless'
                         )
trace_dy2 = graphs.Scatter(
                            x = zc_x, 
                            y = dy, 
                            name = 'Derivative- noisy'
                         )
trace_cutoff = graphs.Scatter(
                            x = [zc_x[0], zc_x[-1]], 
                            y = [cutoff, cutoff], 
                            name = 'Cutoff'
                         )
trace = [trace_noisy, trace_smooth, trace_noiseless, trace_cutoff, trace_dy2]
py.iplot(trace, filename='P7132033_37_1')

In [2]:
# Approach 2: Smooth graph, and take comparisons of smoothed and unsmoothed graph. More different points are deleted

import csv, numpy
import plotly.plotly as py
import plotly.graph_objs as graphs
from scipy import interpolate
from scipy.signal import savgol_filter

# Load file into 2d list
with open('../data/P7132033_37.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)
    zc_str = list(reader)

# Format zc_str to floats and remove low frequency values
zc_x = list()
zc_y = list()
for x, y in zc_str:
    if float(y) > 44000:
        zc_x.append(float(x))
        zc_y.append(float(y))
    
# Define cutoff dy for ignoring noise
cutoff = 300
    
# Smooth graph- Savitsky-Golay filter
yhat2 = savgol_filter(zc_y, 27, 2)
yhat3 = savgol_filter(zc_y, 17, 3)
yhat4 = savgol_filter(zc_y, 7, 4) # best fit so far

# Compare smoothed and original
i = 0
noiseless_y = list()
noiseless_x = list()
dy = list()
while i < len(zc_x):
    dy.append(abs(zc_y[i] - yhat4[i]))
    if dy[-1] < cutoff:
        noiseless_y.append(zc_y[i])
        noiseless_x.append(zc_x[i])
    i += 1
    
# Graph data
trace_noisy = graphs.Scatter(
                            x = zc_x, 
                            y = zc_y,
                            mode = 'markers',
                            name = 'ZC- noisy'
                        )
trace_smooth = graphs.Scatter(
                            x = zc_x, 
                            y = yhat4,
                            name = 'ZC- smoothed'
                        )
trace_noiseless = graphs.Scatter(
                            x = noiseless_x, 
                            y = noiseless_y,
                            mode = 'markers',
                            name = 'ZC- noiseless'
                         )
trace_dy = graphs.Scatter(
                            x = zc_x, 
                            y = dy, 
                            name = 'Derivative- noisy'
                         )
trace_cutoff = graphs.Scatter(
                            x = [zc_x[0], zc_x[-1]], 
                            y = [cutoff, cutoff], 
                            name = 'Cutoff'
                         )
trace = [trace_noisy, trace_smooth, trace_noiseless, trace_cutoff, trace_dy]
py.iplot(trace, filename='P7132033_37_2')

In [48]:
# Approach 3: Smooth graph, and take comparisons of smoothed and unsmoothed graph. Go by chunks and take average difference between smooth and unsmoothed.
# TODO: eliminate holes in the graph

import csv, numpy
import plotly.plotly as py
import plotly.graph_objs as graphs
from scipy import interpolate
from scipy.signal import savgol_filter

# Magic numbers
dy_cutoff = 100
cutoff = 2000
avg_d = 3000
pulse_size = 30

# Load file into 2d list
filename = 'P7132033_37'
with open('../data/'+ filename + '.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)
    zc_str = list(reader)

# Format zc_str to floats and remove low frequency values
zc_x = list()
zc_y = list()
for x, y in zc_str:
        zc_x.append(float(x))
        zc_y.append(float(y))
    
# Get dy
prev_y = 0
dy = list()
for y in zc_y:
    dy.append(abs(y-prev_y))
    prev_y = y

# Smooth holes
i = 1
while i < len(dy):
    if dy[i] > dy_cutoff:
        if dy[i - 1] < dy_cutoff:
            if dy[i + 1] < dy_cutoff:
                zc_y[i] = (zc_y[i - 1] + zc_y[i + 1])/2
            elif dy[i + 2] < dy_cutoff:
                zc_y[i] = (zc_y[i - 1] + zc_y[i + 2])/2
        elif dy[i - 2] < dy_cutoff:
            if dy[i + 1] < dy_cutoff:
                zc_y[i] = (zc_y[i - 2] + zc_y[i + 1])/2
            elif dy[i + 2] < dy_cutoff:
                zc_y[i] = (zc_y[i - 2] + zc_y[i + 2])/2
    i += 1

# Smooth graph- Savitsky-Golay filter
yhat2 = savgol_filter(zc_y, 27, 2)
yhat3 = savgol_filter(zc_y, 17, 3)
yhat4 = savgol_filter(zc_y, 17, 4) # best fit so far

# Compare smoothed and original
i = 0
noiseless_y = list()
noiseless_x = list()
pulses = list()
dy = list()
while i < len(zc_x):
    j = i - 1
    average = 0
    
    # Find closely grouped points and clump them together
    while j < len(zc_x) and numpy.sqrt((zc_x[j] - zc_x[j - 1])**2 + (zc_y[j] - zc_y[j - 1])**2) <= avg_d:
        
        # Variance between smooth graph and original
        average += abs(zc_y[j] - yhat4[j])
        j += 1
        
    # Filter out pulses that are too small or too noisy
    if j - i > pulse_size and average / (j - i) <= cutoff:
        
        # Add pulse lines
        pulses.append(zc_x[i])
        
        # Build noiseless graph
        while i < j:
            noiseless_y.append(zc_y[i])
            noiseless_x.append(zc_x[i])
            i += 1
            
        # Add pulse lines
        pulses.append(zc_x[i])
    i += 1
    
# Graph data
trace_noisy = graphs.Scatter(
                            x = zc_x, 
                            y = zc_y,
                            mode = 'markers',
                            name = 'ZC- noisy'
                        )
trace_smooth = graphs.Scatter(
                            x = zc_x, 
                            y = yhat4,
                            name = 'ZC- smoothed'
                        )
trace_noiseless = graphs.Scatter(
                            x = noiseless_x, 
                            y = noiseless_y,
                            mode = 'markers',
                            name = 'ZC- noiseless'
                         )

shapes = list()
for i in pulses:
    shapes.append({'type': 'line',
                   'xref': 'x',
                   'yref': 'y',
                   'x0': i,
                   'y0': 0,
                   'x1': i,
                   'y1': 100000,
                   'line': {
                        'color': 'rgb(139, 0, 0)',
                        'width': 1,
                    },
                  })
layout = graphs.Layout(shapes=shapes)
trace = [trace_noisy, trace_smooth, trace_noiseless]
figure={
    'data': trace,
    'layout': layout
   }
py.iplot(figure, filename=filename)
