### Motivation

Notebook dedicated to demonstrating how pre-processing techniques are used to process data.

### Imports

**Adaptation to recognize the project root. For demonstration purposes only.**

In [None]:
import sys
import os
import pandas as pd
import numpy as np

# Adiciona o diretório raiz ao sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../')))

**Required**

In [None]:
from ThreeWToolkit.preprocessing import (
    impute_missing_data,
    normalize
)

### How to

##### Impute missing data

Basic usage using mean value

In [None]:
df = pd.DataFrame({
    "a": [1.0, np.nan, 3.0],
    "b": [np.nan, 2.0, 2.0],
    "c": ["x", "y", "z"]
})

imputed_df = impute_missing_data(data = df, strategy = "mean", columns = ["a", "b"])

imputed_df.head(5)

Unnamed: 0,a,b,c
0,1.0,2.0,x
1,2.0,2.0,y
2,3.0,2.0,z


Basic usage using median value

In [None]:
df = pd.DataFrame({
    "a": [1.0, np.nan, 3.0],
    "b": [np.nan, 2.0, 4.0],
    "c": ["x", "y", "z"]
})

imputed_df = impute_missing_data(data = df, strategy = "median", columns = ["a", "b"])

imputed_df.head(5)

Unnamed: 0,a,b,c
0,1.0,3.0,x
1,2.0,2.0,y
2,3.0,4.0,z


Basic usage using constant value

In [None]:
df = pd.DataFrame({
    "a": [1.0, np.nan, 3.0],
    "b": [np.nan, 2.0, 4.0],
    "c": ["x", "y", "z"]
})

imputed_df = impute_missing_data(data = df, strategy = "constant", columns = ["a", "b"], fill_value = -1.0)

imputed_df.head(5)

Unnamed: 0,a,b,c
0,1.0,-1.0,x
1,-1.0,2.0,y
2,3.0,4.0,z


________

##### Normalize

Basic usage with DataFrame

In [None]:
df = pd.DataFrame({"x": [3, 4], "y": [0, 0]})

normed = normalize(X = df, norm = "l2", axis = 1)

normed

Basic usage with Series (Pandas)

In [None]:
s = pd.Series([3.0, 4.0], name = "s")

normed = normalize(X = s, norm = "l2", axis = 0)

normed

________