In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

from numpy.fft import rfft, irfft, rfftfreq, irfftn

# Fast Fourier Transform

The aim of this notebook is to apply the fft transformation on partner_i dataset that was preprocessed without resampling.

## Dataset

The datasets used here were prepared without resampling. The methods used to fill the missings and create new features can be checked in the feature engineering directory in the notebook directory of this project.

In [2]:
df_r3d_and_r7d = pd.read_csv(
    "../../../data/interim/"
    + "r3d_and_r7d-no-resampling-with-fillna-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv"
)
df_r3d_only = pd.read_csv(
    "../../../data/interim/"
    + "r3d_only-no-resampling-with-fillna-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv"
)
df_no_r3d_r7d = pd.read_csv(
    "../../../data/interim/"
    + "no-r3d-r7d-no-resampling-with-fillna-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv"
)

In [3]:
def convert_df_cols_to_float_type(df, float_cols):
    #Change data types to float, except for dates
    df[float_cols] = df[float_cols].astype(float)
    return df

In [4]:
def pass_ft(s, threshold=2e4):
    fourier = rfft(s)
    frequencies = rfftfreq(s.size, d=2e-3 / s.size)
    fourier[frequencies > threshold] = 0

    return irfft(fourier,n=len(s))

In [5]:
float_cols_1 = df_r3d_and_r7d.columns[1:]
df_r3d_and_r7d = convert_df_cols_to_float_type(df_r3d_and_r7d, float_cols_1)

float_cols_2 = df_r3d_only.columns[1:]
df_r3d_only = convert_df_cols_to_float_type(df_r3d_only, float_cols_2)

float_cols_3 = df_no_r3d_r7d.columns[1:]
df_no_r3d_r7d = convert_df_cols_to_float_type(df_no_r3d_r7d, float_cols_3)

## Apply FFT and keep all vars

In [6]:
for data in [df_r3d_and_r7d, df_r3d_only, df_no_r3d_r7d]:
    # Apply function in dataframe
    for c in data.drop(["R28D", "Data/Hora"], axis=1).columns:
        data[str(c) + "_ft"] = pass_ft(data[c])

In [7]:
df_r3d_and_r7d.to_csv(
    "../../../data/interim/"
    + "r3d_and_r7d-no-resampling-with-fillna-and-fft-all-vars-kept-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)
df_r3d_only.to_csv(
    "../../../data/interim/"
    + "r3d_only-no-resampling-with-fillna-and-fft-all-vars-kept-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)
df_no_r3d_r7d.to_csv(
    "../../../data/interim/"
    + "no-r3d-r7d-no-resampling-with-fillna-and-fft-all-vars-kept-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)

## Apply FFT and keep only transformed vars

In [8]:
df_r3d_and_r7d = df_r3d_and_r7d.drop(float_cols_1.drop('R28D'), axis=1)
df_r3d_only = df_r3d_only.drop(float_cols_2.drop('R28D'), axis=1)
df_no_r3d_r7d = df_no_r3d_r7d.drop(float_cols_3.drop('R28D'), axis=1)

In [9]:
df_r3d_and_r7d.to_csv(
    "../../../data/interim/"
    + "r3d_and_r7d-no-resampling-with-fillna-and-fft-vars-only-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)
df_r3d_only.to_csv(
    "../../../data/interim/"
    + "r3d_only-no-resampling-with-fillna-and-fft-vars-only-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)
df_no_r3d_r7d.to_csv(
    "../../../data/interim/"
    + "no-r3d-r7d-no-resampling-with-fillna-and-fft-vars-only-"
    + "dados-historicos-partner_i-cement-CPIIE40.csv",
    index=False,
)