In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, FunctionTransformer
import pandas as pd
from pathlib import Path
import librosa
from custom_transformers import LibrosaTransformer, ElementwiseSummaryStats
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
train_fpath = Path("../data/processed/train_data.pkl").resolve()
train_df = pd.read_pickle(train_fpath)
y = train_df["target"]
X = train_df["audio"]

In [None]:
mfcc_transformer = LibrosaTransformer("mfcc", n_mfcc=13)
summary_stats = ElementwiseSummaryStats(desc_kw_args={"axis": 1})

pipe = Pipeline(
    [
        ("mfcc", mfcc_transformer),
        ("summary_stats", summary_stats),
    ]
)

In [None]:
X_trans = pipe.fit_transform(X)

In [None]:
X_trans.columns.get_level_values(0).unique()

In [None]:
d = {X_trans: "apple"}

In [None]:
tmp_x = X_trans["min"]
pca = PCA()
x_pca = pca.fit_transform(tmp_x)

In [None]:
pca.explained_variance_ratio_

In [None]:
fig, ax = plt.subplots()

sns.barplot(
    x=range(1, len(pca.explained_variance_ratio_) + 1),
    y=pca.explained_variance_ratio_,
    ax=ax,
)
ax.set_xlabel("Principal Component")
sns.lineplot(
    x=range(1, len(pca.explained_variance_ratio_) + 1),
    y=pca.explained_variance_ratio_.cumsum(),
    ax=ax,
)

In [None]:
pca.transform(X)

In [None]:
fig, ax = plt.subplots()
sns.scatterplot(x=x_pca[:, 1], y=x_pca[:, 2], ax=ax, hue=y)