In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
BASE_DIR = "newdata/data_excel_finalmatrix_modified/"

In [None]:
def readfile(filename):
    return pd.read_excel(BASE_DIR + filename)

In [None]:
def filter_data(data):
    filtered_data = np.array(data)
    fil_data = []
    for x in filtered_data:
        if x[1] != 0:
            fil_data.append((x[1],x[-1]))
    filtered_data = np.array(fil_data)
    return filtered_data

In [None]:
def plot_data(data,saveplace=None,ylim_min=None):
    plt.figure(figsize=(20,12))
    plt.scatter(data[:,1],data[:,0])
    if ylim_min is None:
        plt.ylim(60,0)
    else:
        plt.ylim(60,ylim_min)
    if saveplace is not None:
        plt.savefig(saveplace)
    plt.show()

In [None]:
plot_data(filter_data(readfile("shashi5.xlsx")))

In [None]:
GAUSSIAN_KERNELS = {5 : [np.array([5,15,60,15,5])/100]}

In [None]:
def apply_filter(data,kernel,kernel_len):
    pd = int(kernel_len/2)
    ln = len(data)
    newdata = []
    for i in range(pd):
        newdata.append(data[i])
    for i in range(pd,ln - pd):
        val = np.sum(data[i-pd:i+pd+1,0]*kernel)
        newdata.append((val,data[i,1]))
    for i in range(ln-pd,ln) :
        newdata.append(data[i])
    return np.array(newdata)

In [None]:
data = readfile("shashi5.xlsx")

In [None]:
data = filter_data(data)

In [None]:
filtered_data = apply_filter(data,GAUSSIAN_KERNELS[5][0],5)

In [None]:
plot_data(filtered_data)

In [None]:
if not os.path.exists("filter_plots"):
    os.mkdir("filter_plots")
for filter_len, filters in GAUSSIAN_KERNELS.items():
    for i, fil in enumerate(filters):
        foldername = "filter_plots/{}_{}".format(filter_len,i)
        if not os.path.exists(foldername):
            os.mkdir(foldername)
        foldername += "/"
        for file in os.listdir(BASE_DIR):
            try:
                data = readfile(file)
                data = filter_data(data)
                plot_data(data,foldername+"/"+file[:-5]+"_original.png")
                gaussian_data = apply_filter(data,fil,filter_len)
                plot_data(gaussian_data,foldername+"/"+file[:-5]+"_gaussian.png")
            except :
                print(file)

In [None]:
LAPLACIAN_FILTER = {5 : np.array([-1,-1,4,-1,-1])}

In [None]:
laplacian_data = apply_filter(data,LAPLACIAN_FILTER[5],5)

In [None]:
plot_data(laplacian_data)

In [None]:
def conservative_smoothing(data,kernel_len):
    pd = int(kernel_len/2)
    ln = len(data)
    newdata = []
    for i in range(pd):
        newdata.append(data[i])
    for i in range(pd,ln - pd):
        #val = np.sum(data[i-pd:i+pd+1,0]*kernel)
        neighbors = data[i-pd:i,0]
        neighbors = np.concatenate((neighbors,data[i+1:i+1+pd,0]))
        if data[i,0] < np.min(neighbors):
            newdata.append((np.min(neighbors),data[i,1]))
        elif data[i,0] > np.max(neighbors):
            newdata.append((np.max(neighbors),data[i,1]))
        else:
            newdata.append((data[i,0],data[i,1]))
    for i in range(ln-pd,ln) :
        newdata.append(data[i])
    return np.array(newdata)

In [None]:
conservative_data = conservative_smoothing(data,5)

In [None]:
if not os.path.exists("filter_plots"):
    os.mkdir("filter_plots")
for filter_len, filters in GAUSSIAN_KERNELS.items():
    for i, fil in enumerate(filters):
        foldername = "filter_plots/{}_{}".format(filter_len,i)
        if not os.path.exists(foldername):
            os.mkdir(foldername)
        foldername += "/"
        for file in os.listdir(BASE_DIR):
            try:
                data = readfile(file)
                data = filter_data(data)
                conservative_data = conservative_smoothing(data,5)
                plot_data(conservative_data,foldername+"/"+file[:-5]+"_conservative.png")
                gaussian_data = apply_filter(conservative_data,fil,filter_len)
                plot_data(gaussian_data,foldername+"/"+file[:-5]+"_conservative_gaussian.png")
            except :
                print(file)

In [None]:
plot_data(data)

In [None]:
gaussian_conservative = apply_filter(conservative_data,GAUSSIAN_KERNELS[5][0],5)

In [None]:
plot_data(gaussian_conservative)

In [None]:
FIRST_DERIVATIVE = {3 : [np.array([-1,0,1]),np.array([-2,0,-2])], 5 : [np.array([-1,-2,0,2,1])]}

In [None]:
conservative_gauss = apply_filter(conservative_data,GAUSSIAN_KERNELS[5][0],5)

In [None]:
plot_data(conservative_data)

In [None]:
plot_data(conservative_gauss)

In [None]:
first_der = apply_filter(conservative_data,FIRST_DERIVATIVE[3][0],3)

In [None]:
plot_data(first_der,ylim_min=-60)

In [None]:
first_der_raw = apply_filter(conservative_gauss,FIRST_DERIVATIVE[3][0],3)

In [None]:
plot_data(first_der_raw,ylim_min=-60)

In [None]:
first_der_5_g = apply_filter(conservative_gauss,FIRST_DERIVATIVE[5][0],5)

In [None]:
plot_data(first_der_5_g,ylim_min=-60)

In [None]:
first_der_5 = apply_filter(conservative_data,FIRST_DERIVATIVE[5][0],5)

In [None]:
plot_data(first_der_5,ylim_min=-60)

In [None]:
plot_data(conservative_data)

In [None]:
plot_data(data)