# Filtering is important to know the most important from the data

In [1]:
import numpy as np
import pandas as pd
import os

## Median Filter

### Reference: https://gist.github.com/bhawkins/3535131

In [2]:
def medfilt (x, k):
    """Apply a length-k median filter to a 1D array x.
    Boundaries are extended by repeating endpoints.
    """
    assert k % 2 == 1, "Median filter length must be odd."
    assert x.ndim == 1, "Input must be one-dimensional."
    k2 = (k - 1) // 2
    y = np.zeros ((len (x), k), dtype=x.dtype)
    y[:,k2] = x
    for i in range (k2):
        j = k2 - i
        y[j:,i] = x[:-j]
        y[:j,i] = x[0]
        y[:-j,-(i+1)] = x[j:]
        y[-j:,-(i+1)] = x[-1]
    return np.median (y, axis=1)

## Despiking

### Reference: https://stackoverflow.com/questions/37556487/remove-spikes-from-signal-in-python

In [3]:
def despike(yi, th=1.e-8):
    '''Remove spike from array yi, the spike area is where the difference between
    the neigboring points is higher than th.'''
    y = np.copy(yi) # use y = y1 if it is OK to modify input array
    n = len(y)
    x = np.arange(n)
    c = np.argmax(y)
    d = abs(np.diff(y))
    try:
        l = c - 1 - np.where(d[c-1::-1]<th)[0][0]
        r = c + np.where(d[c:]<th)[0][0] + 1
    except: # no spike, return unaltered array
        return y
    # for fit, use area twice wider then the spike
    if (r-l) <= 3:
        l -= 1
        r += 1
        s = int(round((r-l)/2.))
        lx = l - s
        rx = r + s
        # make a gap at spike area
        xgapped = np.concatenate((x[lx:l],x[r:rx]))
        ygapped = np.concatenate((y[lx:l],y[r:rx]))
        # quadratic fit of the gapped array
        z = np.polyfit(xgapped,ygapped,2)
        p = np.poly1d(z)
        y[l:r] = p(x[l:r])
    return y

## Clipping, Median Filter, Despike

In [4]:
def log_filter(filename, a_min=0, a_max=40, k=7):
    result = []
    col = ['Clipping', 'Median_Filter', 'Despike']
    a = np.loadtxt(filename)
    result.append(np.clip(a, a_min, a_max))
    result.append(medfilt(a, k))
    result.append(despike(a))
    df = pd.DataFrame(np.array(result).T, columns=col)
    df.to_csv('result/' + os.path.splitext(filename)[0] + '_Filtered.csv', index=False)
    return df

In [5]:
well_name = ['01_SHRIMPLIN_GR.txt', '02_SHANKLE_GR.txt', '03_LUKE_G_U_GR.txt',
             '04_CROSS_H_CATTLE_GR.txt', '05_NOLAN_GR.txt', '06_NEWBY_GR.txt',
             '07_CHURCHMAN_BIBLE_GR.txt', '08_Recruit_F9_GR.txt']

In [6]:
for i in range(len(well_name)):
    log_filter(well_name[i])