# Introduction
This is just simple notebook for filtering for denoising.  
if you find the notebook is useful, please upvote👍.

# Import

In [None]:
import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

In [None]:
mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = False

# Dataset

In [None]:
asset_df = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_df.info()
asset_df

In [None]:
train_df = pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
train_df.info()
train_df.head()

# Plot time series

In [None]:
def plot_time_series(asset_id:int, asset_value:str, start:int = 0, end:int = len(train_df), visualise:bool = True,drop_na = True):
    """Plot time series data from train_df(train.csv)

    Args:
        asset_id (int): Asset_ID.
        asset_value (str): Count, Open, High, Low, Close, Volume, VWAP, Target.
        start(int): start index
        end(int): end index
        visualise(bool): If true, the gragh is plot
        drop_na(bool):
    """
    
    asset_name = asset_df[asset_df["Asset_ID"]==asset_id]["Asset_Name"].values[0]
    train_crypro_df = train_df[train_df["Asset_ID"]==asset_id].reset_index(drop=True)
    
    if drop_na:
        train_crypro_df = train_crypro_df.dropna(subset=[asset_value])
    
    train_crypro_slice_df = train_crypro_df.iloc[start:end]
    #train_crypro_df.info()
    #train_crypro_df.head()
    t = np.array(list(train_crypro_slice_df.index))
    y = np.array(train_crypro_slice_df[asset_value].values)

    if visualise:
        plt.plot(t, y)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.title(f"{asset_value} of {asset_name}\n(start={start},end={end})",fontsize=30)
        plt.xlabel("Time",fontsize=20)
        plt.ylabel(f"{asset_value}",fontsize=20)
        plt.show()
    
    
    return t, y

In [None]:
t, y = plot_time_series(asset_id = 0,asset_value = "Target",visualise=True)

In [None]:
t,y = plot_time_series(asset_id = 0,asset_value = "Target",start = 0,end = 1000)

# Check if any NAN value
The above data looks good.  
But let's check if any NAN.  

In [None]:
def check_nan(asset_id,asset_value,start,end):
    
    asset_name = asset_df[asset_df["Asset_ID"]==asset_id]["Asset_Name"].values[0]
    train_crypro_df = train_df[train_df["Asset_ID"]==asset_id].reset_index(drop=True)
    train_crypro_slice_df = train_crypro_df.iloc[start:end]
    
    if train_crypro_slice_df[asset_value].isnull().values.any():
        null_num = train_crypro_slice_df[asset_value].isnull().sum()
        print(f"Unfortunately from {start} to {end}, there are {null_num} missing value(s) for {asset_name}...") 
    
    else:
        print(f"Wow!\nThis data(from {start} to {end} for {asset_name}) is very clean(no missing values)!\nUnbelievable!")
    

In [None]:
check_nan(asset_id = 0,asset_value = "Target",start = 0,end = 1000)

In [None]:
check_nan(asset_id = 2,asset_value = "Target",start = 0,end = 1000)

In [None]:
t,y = plot_time_series(asset_id = 2,asset_value = "Target",start = 0,end = 1000)

# FFT

In [my last notebook](https://www.kaggle.com/osamurai/fft-analysis-tutorial), I did FFT.  
So let's check it again for this dataset.

In [None]:
def plot_fft(y:np.ndarray, t:np.ndarray, fs:int, sample_num:int, value_name:str):
    """Generate fft and plot

    Args:
        y (np.ndarray): time series value.
        t (np.ndarray): time.
        fs(int):sampling frequency.
        sample_num(int): sample number.
        value_name(str): name of value
    """
    
    sp = np.fft.fft(y)
    freq = np.fft.fftfreq(t.shape[-1], d=1.0/fs)
    
    Amp = np.abs(sp/(sample_num/2)) # Amplitude

    fig, ax = plt.subplots(figsize=(12,8))
    ax.plot(freq[1:int(sample_num/2)], Amp[1:int(sample_num/2)])
    plt.title(f"FFT of {value_name}",fontsize=30)
    ax.set_xlabel("Freqency [Hz]")
    ax.set_ylabel("Amplitude")
    ax.grid()
    #plt.xlim(0,5)#Change range according to frequency of wave
    plt.show()

In [None]:
n=1000

In [None]:
plot_fft(y=y,t=t,fs=1000,sample_num=n,value_name="Bitcoin cash")

There is some noise.  
But if filtering (in this case, low pass filtering) is applied, we can get denoised data.  

For this, scipy is used.  

In [None]:
from scipy import signal
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.buttord.html

dt = 1/n 
fn = 1/(2*dt)  

# parameter setting
fp = 10                          # passband frequency[Hz]
fs = 50                        # stopband frequency[Hz]
gpass = 1                       # The maximum loss in the passband[dB]
gstop = 40                      # The minimum attenuation in the stopband [dB]
# normalization
Wp = fp/fn
Ws = fs/fn

# Butterworth filter
N, Wn = signal.buttord(Wp, Ws, gpass, gstop)
b1, a1 = signal.butter(N, Wn, "low")
y1 = signal.filtfilt(b1, a1, y)

# Chebyshev filter Ⅰ
N, Wn = signal.cheb1ord(Wp, Ws, gpass, gstop)
b2, a2 = signal.cheby1(N, gpass, Wn, "low")
y2 = signal.filtfilt(b2, a2, y)

# Chebyshev filter Ⅱ
N, Wn = signal.cheb2ord(Wp, Ws, gpass, gstop)
b3, a3 = signal.cheby2(N, gstop, Wn, "low")
y3 = signal.filtfilt(b3, a3, y)

# Elliptic filter
N, Wn = signal.ellipord(Wp, Ws, gpass, gstop)
b4, a4 = signal.ellip(N, gpass, gstop, Wn, "low")
y4 = signal.filtfilt(b4, a4, y)

# Bessel filter
N = 4
b5, a5 = signal.bessel(N, Ws, "low")
y5 = signal.filtfilt(b5, a5, y)

# FIR filter
a6 = 1
numtaps = n
b6 = signal.firwin(numtaps, Wp, window="hann")
y6 = signal.lfilter(b6, a6, y)
delay = (numtaps-1)/2*dt

# Plot
plt.figure()
plt.plot(t, y, "b")
plt.plot(t, y1, "r", linewidth=2, label="butter")
plt.plot(t, y2, "g", linewidth=2, label="cheby1")
plt.plot(t, y3, "c", linewidth=2, label="cheby2")
plt.plot(t, y4, "m", linewidth=2, label="ellip")
plt.plot(t, y5, "k", linewidth=2, label="bessel")
plt.plot(t-delay, y6, "y", linewidth=2, label="fir")
plt.legend(loc="upper right")
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")
plt.show()

[Bessel filter](https://en.wikipedia.org/wiki/Bessel_filter) looks good.  
Let's plot again.  

In [None]:
plt.figure()
plt.plot(t, y, "b")
plt.plot(t, y5, "k", linewidth=2, label="bessel")
plt.legend(loc="upper right")
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")
plt.show()

That's it!
Again if you find the notebook is useful, please upvote👍.