# WTS Pipeline Integration
Exploratory notebook for working on birdnet embeddings

### Imports and Setup

In [86]:
import pandas as pd
import os
import numpy as np
from annotation_post_processing import *

In [87]:
embeddingColumns = [str(i) for i in range(420)]
columnNames = ["START", "END"] + embeddingColumns
path = './input/cosmos_embeddings/'

In [88]:
import pandas as pd
import os

def generate_embeddings_from_file(path, filename):
    with open(path + filename, 'r') as f:
        data = f.read()
    with open(path + filename, 'w') as f:
        f.write(",".join(data.split("\t")))
    file_df = pd.read_csv(path + filename, names = columnNames)
    file_df["IN FILE"] = filename[:filename.index(".birdnet")] + ".wav"
    file_df["FILE SPECIES"] = " ".join(filename.split(" ")[2:4])
    return file_df

def generate_embeddings(path):
    df = pd.DataFrame()
    for filename in os.listdir(path):
        try:
            df = pd.concat([df, generate_embeddings_from_file(path, filename)], ignore_index = True)
            print("Done with " + filename)
        except Exception as e:
            print("Something went wrong with: " + filename)
    df["PATH"] = path
    columns = df.columns.tolist()
    columns = columns[-2:] + columns[:-2]
    df = df[columns]
    df = df.sort_values(["IN FILE", "START"], ascending = True)
    df = df.reset_index(drop = True)
    return df

In [89]:
embeddings_df = generate_embeddings(path)
embeddings_df

Done with XC63636 - Southern Nightingale-Wren - Microcerculus marginatus.birdnet.embeddings.txt
Done with XC84810 - Rufous-collared Sparrow - Zonotrichia capensis subtorquata.birdnet.embeddings.txt
Done with XC64386 - Buff-throated Woodcreeper - Xiphorhynchus guttatus.birdnet.embeddings.txt
Done with XC98265 - Yellow-olive Flatbill - Tolmomyias sulphurescens confusus.birdnet.embeddings.txt
Done with XC601007 - Rufous-collared Sparrow - Zonotrichia capensis.birdnet.embeddings.txt
Done with XC484117 - Buff-throated Woodcreeper - Xiphorhynchus guttatus.birdnet.embeddings.txt
Done with XC699683 - Yellow-olive Flatbill - Tolmomyias sulphurescens.birdnet.embeddings.txt
Done with XC301004 - Rufous-collared Sparrow - Zonotrichia capensis.birdnet.embeddings.txt
Done with XC104217 - Slate-throated Whitestart - Myioborus miniatus.birdnet.embeddings.txt
Done with XC609552 - White-throated Toucan - Ramphastos tucanus.birdnet.embeddings.txt
Done with XC128171 - Rufous-collared Sparrow - Zonotrichia 

Unnamed: 0,FILE SPECIES,PATH,START,END,0,1,2,3,4,5,...,411,412,413,414,415,416,417,418,419,IN FILE
0,Southern Nightingale-Wren,./input/cosmos_embeddings/,0.0,3.0,0.516491,0.650946,0.447532,0.008416,0.976974,0.210846,...,0.764997,0.224773,0.307177,0.618153,0.572281,0.898645,0.424682,0.784120,0.582382,XC100027 - Southern Nightingale-Wren - Microce...
1,Southern Nightingale-Wren,./input/cosmos_embeddings/,3.0,6.0,0.332206,0.504845,0.053778,0.049535,0.893746,0.282089,...,0.414043,0.599959,0.683068,0.572789,0.827336,0.678146,0.956513,0.310204,0.560730,XC100027 - Southern Nightingale-Wren - Microce...
2,Southern Nightingale-Wren,./input/cosmos_embeddings/,6.0,9.0,0.508598,0.422192,0.175864,0.046417,1.243934,0.341751,...,0.344001,0.751044,0.310172,1.276110,0.756434,0.482373,0.850690,0.280638,1.044262,XC100027 - Southern Nightingale-Wren - Microce...
3,Southern Nightingale-Wren,./input/cosmos_embeddings/,9.0,12.0,0.834292,0.763748,0.289305,0.254106,0.966129,0.475406,...,0.837616,0.776913,0.834856,1.046136,1.034076,0.706145,0.880231,0.625652,0.480209,XC100027 - Southern Nightingale-Wren - Microce...
4,Southern Nightingale-Wren,./input/cosmos_embeddings/,12.0,15.0,0.775968,0.195914,0.237553,0.111716,1.452784,0.297751,...,0.739770,1.074138,0.476351,1.474918,1.059757,0.871452,1.035708,0.440124,0.510227,XC100027 - Southern Nightingale-Wren - Microce...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39953,Blue-grey Tanager,./input/cosmos_embeddings/,0.0,3.0,1.055252,1.559146,1.135013,1.333304,0.308816,0.951709,...,0.277709,0.623836,0.808401,0.348465,0.968598,0.382283,2.083897,0.000000,0.639748,XC9881 - Blue-grey Tanager - Thraupis episcopu...
39954,Blue-grey Tanager,./input/cosmos_embeddings/,3.0,6.0,1.132612,0.653347,1.251634,0.573085,0.714166,1.560998,...,0.457551,0.526104,0.413813,0.551209,0.755623,0.408722,0.310266,0.451188,0.295859,XC9881 - Blue-grey Tanager - Thraupis episcopu...
39955,Blue-grey Tanager,./input/cosmos_embeddings/,6.0,9.0,1.082742,0.205875,0.709146,0.203989,0.907365,0.756531,...,0.156727,0.067946,0.902726,0.399330,0.752100,0.304018,1.156913,0.044347,1.153144,XC9881 - Blue-grey Tanager - Thraupis episcopu...
39956,Blue-grey Tanager,./input/cosmos_embeddings/,9.0,12.0,1.338971,0.901556,1.706767,0.925638,0.152560,1.294218,...,0.189816,0.188148,0.987711,0.478155,0.284822,0.135123,0.540614,0.374931,0.047007,XC9881 - Blue-grey Tanager - Thraupis episcopu...


In [90]:
automated_dfs:list[pd.DataFrame] = []
automated_dfs.append(pd.read_csv("./cosmos_annotations/automated_cosmos_tweety_to_file.csv"))
automated_dfs.append(pd.read_csv("./cosmos_annotations/COSMOS_BirdNET-Lite_Labels_05Conf.csv"))
automated_dfs.append(pd.read_csv("./cosmos_annotations/COSMOS_BirdNET-Lite_Labels_100.csv"))
automated_dfs.append(pd.read_csv("./cosmos_annotations/COSMOS_BirdNET-Lite-Filename_Labels_05Conf.csv"))
automated_dfs.append(pd.read_csv("./cosmos_annotations/COSMOS_Microfaune-Filename_Labels_100.csv"))
print(automated_dfs)

[       Unnamed: 0    OFFSET  DURATION  \
0               0  1.883721  0.116279   
1               1  3.976744  0.046512   
2               2  5.976744  0.046512   
3               3  6.023256  0.046512   
4               4  6.069767  0.116279   
...           ...       ...       ...   
55642       55642  1.511628  0.325581   
55643       55643  1.860465  0.139535   
55644       55644  5.976744  0.372093   
55645       55645  6.627907  0.255814   
55646       55646  0.046512  0.255814   

                                         FOLDER  \
0      C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
1      C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
2      C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
3      C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
4      C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
...                                         ...   
55642  C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
55643  C:/Users/Siloux/Desktop/E4E/Cosmos_data/   
55644  C:/Users/Siloux/Desktop/E4E/Cosmos_data/

### Filtering Embeddings with HDBSCAN

In [91]:
%matplotlib inline
import hdbscan
from hdbscan import HDBSCAN
from hdbscan.prediction import approximate_predict
import pickle
import seaborn as sns
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from sklearn.preprocessing import LabelEncoder

plt.style.use("ggplot")

In [92]:
hdbscan_results = pd.DataFrame(columns=["FILE SPECIES", "PATH"] + columnNames + ["IN FILE", "LABELS"])
i = 0
for species in embeddings_df["FILE SPECIES"].unique():
    species_result = pd.read_csv(f"./ClusteringModels/species_specific/{species}.csv").drop(["Unnamed: 0"], axis=1)
    # Method 1: Simply filters out what was labeled as noise in recording
    # filter = species_result[species_result["LABELS"] != -1]
    
    # Method 2: Filters out noise and creates the filter by checking the mode of embedding labels (essentially to see most frequent bird, which should be dominant bird in the recording)
    species_result = species_result[species_result["LABELS"] != -1]
    max_num = species_result["LABELS"].mode().tolist()[0] #picking the lower number (for consistency)
    filter = species_result[species_result["LABELS"] != max_num]
    hdbscan_results = pd.concat([hdbscan_results, filter], axis=0)
    i += 1
    print(f"Done {i}")

hdbscan_results

Done 1
Done 2
Done 3
Done 4
Done 5
Done 6
Done 7
Done 8
Done 9
Done 10


Unnamed: 0,FILE SPECIES,PATH,START,END,0,1,2,3,4,5,...,412,413,414,415,416,417,418,419,IN FILE,LABELS
0,Southern Nightingale-Wren,./input/cosmos_embeddings/,0.0,3.0,0.516491,0.650946,0.447532,0.008416,0.976974,0.210846,...,0.224773,0.307177,0.618153,0.572281,0.898645,0.424682,0.784120,0.582382,XC100027 - Southern Nightingale-Wren - Microce...,47
1,Southern Nightingale-Wren,./input/cosmos_embeddings/,3.0,6.0,0.332206,0.504845,0.053778,0.049535,0.893746,0.282089,...,0.599959,0.683068,0.572789,0.827336,0.678146,0.956513,0.310204,0.560730,XC100027 - Southern Nightingale-Wren - Microce...,47
2,Southern Nightingale-Wren,./input/cosmos_embeddings/,6.0,9.0,0.508598,0.422192,0.175864,0.046417,1.243934,0.341751,...,0.751044,0.310172,1.276110,0.756434,0.482373,0.850690,0.280638,1.044262,XC100027 - Southern Nightingale-Wren - Microce...,47
6,Southern Nightingale-Wren,./input/cosmos_embeddings/,18.0,21.0,0.661588,0.314132,0.361304,0.134765,1.526731,0.372206,...,0.831590,0.772766,1.721585,0.614672,0.585939,0.881569,0.448760,0.296858,XC100027 - Southern Nightingale-Wren - Microce...,47
7,Southern Nightingale-Wren,./input/cosmos_embeddings/,21.0,24.0,0.425082,0.373314,0.049487,0.008416,1.328719,0.157014,...,0.734619,0.416437,1.193141,0.866552,0.429371,0.811494,0.625381,0.485380,XC100027 - Southern Nightingale-Wren - Microce...,47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1256,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,24.0,27.0,0.610505,0.296693,0.483295,0.056628,0.113244,0.644921,...,0.412486,0.428114,0.249396,0.622904,1.125753,1.450461,0.069599,0.470338,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1257,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,27.0,30.0,0.958089,0.305177,0.192699,0.749554,0.307977,0.361235,...,0.524962,0.435501,0.373535,0.517083,1.236541,1.000128,0.444875,0.620538,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1258,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,30.0,33.0,0.877227,0.319196,0.393915,0.568002,0.372513,0.564511,...,0.388511,0.470560,0.735972,1.169871,0.819529,1.088604,0.176100,0.463833,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1268,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,6.0,9.0,0.424716,0.328351,0.436900,0.155210,0.263310,0.343720,...,0.280424,0.334078,0.201588,0.644591,0.568780,0.835398,0.395326,0.166977,XC87450 - Scaly-breasted Woodpecker - Celeus g...,29


In [93]:
filtered_embeddings = hdbscan_results
print("Created filter")

filtered_embeddings

Created filter


Unnamed: 0,FILE SPECIES,PATH,START,END,0,1,2,3,4,5,...,412,413,414,415,416,417,418,419,IN FILE,LABELS
0,Southern Nightingale-Wren,./input/cosmos_embeddings/,0.0,3.0,0.516491,0.650946,0.447532,0.008416,0.976974,0.210846,...,0.224773,0.307177,0.618153,0.572281,0.898645,0.424682,0.784120,0.582382,XC100027 - Southern Nightingale-Wren - Microce...,47
1,Southern Nightingale-Wren,./input/cosmos_embeddings/,3.0,6.0,0.332206,0.504845,0.053778,0.049535,0.893746,0.282089,...,0.599959,0.683068,0.572789,0.827336,0.678146,0.956513,0.310204,0.560730,XC100027 - Southern Nightingale-Wren - Microce...,47
2,Southern Nightingale-Wren,./input/cosmos_embeddings/,6.0,9.0,0.508598,0.422192,0.175864,0.046417,1.243934,0.341751,...,0.751044,0.310172,1.276110,0.756434,0.482373,0.850690,0.280638,1.044262,XC100027 - Southern Nightingale-Wren - Microce...,47
6,Southern Nightingale-Wren,./input/cosmos_embeddings/,18.0,21.0,0.661588,0.314132,0.361304,0.134765,1.526731,0.372206,...,0.831590,0.772766,1.721585,0.614672,0.585939,0.881569,0.448760,0.296858,XC100027 - Southern Nightingale-Wren - Microce...,47
7,Southern Nightingale-Wren,./input/cosmos_embeddings/,21.0,24.0,0.425082,0.373314,0.049487,0.008416,1.328719,0.157014,...,0.734619,0.416437,1.193141,0.866552,0.429371,0.811494,0.625381,0.485380,XC100027 - Southern Nightingale-Wren - Microce...,47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1256,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,24.0,27.0,0.610505,0.296693,0.483295,0.056628,0.113244,0.644921,...,0.412486,0.428114,0.249396,0.622904,1.125753,1.450461,0.069599,0.470338,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1257,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,27.0,30.0,0.958089,0.305177,0.192699,0.749554,0.307977,0.361235,...,0.524962,0.435501,0.373535,0.517083,1.236541,1.000128,0.444875,0.620538,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1258,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,30.0,33.0,0.877227,0.319196,0.393915,0.568002,0.372513,0.564511,...,0.388511,0.470560,0.735972,1.169871,0.819529,1.088604,0.176100,0.463833,XC753 - Scaly-breasted Woodpecker - Celeus gra...,8
1268,Scaly-breasted Woodpecker,./input/cosmos_embeddings/,6.0,9.0,0.424716,0.328351,0.436900,0.155210,0.263310,0.343720,...,0.280424,0.334078,0.201588,0.644591,0.568780,0.835398,0.395326,0.166977,XC87450 - Scaly-breasted Woodpecker - Celeus g...,29


### Applying Mask to WTS Pipeline

In [94]:
count1 = 0
def split_annotations(df: pd.DataFrame):
    all_split_ann = pd.DataFrame(columns = df.columns)
    for i in range(df.shape[0]):
        x = df.iloc[i]
        startsends = np.linspace(3.0 * (int(x["OFFSET"] / 3)), 3.0 * (int((x["OFFSET"] + x["DURATION"])/ 3) + 1), int((x["OFFSET"] + x["DURATION"])/ 3) - int(x["OFFSET"] / 3) + 2)
        starts = startsends[:-1]
        starts[0] = x["OFFSET"]
        ends = startsends[1:]
        ends[-1] = x["OFFSET"] + x["DURATION"]
        split_ann = pd.DataFrame(columns = x.index)
        for i in range(len(starts)):
            new_x = pd.DataFrame(x.copy()).T
            new_x["OFFSET"] = starts[i]
            new_x["DURATION"] = ends[i] - starts[i]
            if np.isclose(new_x["DURATION"], 0):
                continue
            split_ann = pd.concat([split_ann, new_x])
        all_split_ann = pd.concat([all_split_ann, split_ann])
        global count1
        count1 += 1
        print(f"Completed {count1} annotations")
    return all_split_ann.reset_index(drop = True)

count2 = 0
def create_annotation_filter(x: pd.Series, filter: pd.DataFrame) -> pd.DataFrame:
    filter_x = filter[filter["IN FILE"].str.startswith(x["IN FILE"].split(".mp3")[0])]
    starts = filter_x["START"].to_numpy()
    ends = filter_x["END"].to_numpy()
    close_starts = np.isclose(starts, x["OFFSET"]).sum()
    close_ends = np.isclose(ends, x["OFFSET"] + x["DURATION"]).sum()
    middle1 = starts < x["OFFSET"]
    middle2 = ends > x["OFFSET"] + x["DURATION"]
    middle = (middle1*middle2).sum()
    if (close_starts + close_ends + middle) > 0:
        x["FILTERED"] = True
    else:
        x["FILTERED"] = False
    global count2
    count2 += 1
    print(f"Completed {count2} annotations")
    return x

In [95]:
automated_dfs_split = [split_annotations(df) for df in automated_dfs]
print(automated_dfs_split)

Completed 1 annotations
Completed 2 annotations
Completed 3 annotations
Completed 4 annotations
Completed 5 annotations
Completed 6 annotations
Completed 7 annotations
Completed 8 annotations
Completed 9 annotations
Completed 10 annotations
Completed 11 annotations
Completed 12 annotations
Completed 13 annotations
Completed 14 annotations
Completed 15 annotations
Completed 16 annotations
Completed 17 annotations
Completed 18 annotations
Completed 19 annotations
Completed 20 annotations
Completed 21 annotations
Completed 22 annotations
Completed 23 annotations
Completed 24 annotations
Completed 25 annotations
Completed 26 annotations
Completed 27 annotations
Completed 28 annotations
Completed 29 annotations
Completed 30 annotations
Completed 31 annotations
Completed 32 annotations
Completed 33 annotations
Completed 34 annotations
Completed 35 annotations
Completed 36 annotations
Completed 37 annotations
Completed 38 annotations
Completed 39 annotations
Completed 40 annotations
Completed

In [96]:
automated_dfs_filtered = [df.apply(lambda x: create_annotation_filter(x, filtered_embeddings), axis = 1) for df in automated_dfs_split]
automated_dfs_filtered = [df[~df["FILTERED"]] for df in automated_dfs_filtered]
print(automated_dfs_filtered)

Completed 1 annotations
Completed 2 annotations
Completed 3 annotations
Completed 4 annotations
Completed 5 annotations
Completed 6 annotations
Completed 7 annotations
Completed 8 annotations
Completed 9 annotations
Completed 10 annotations
Completed 11 annotations
Completed 12 annotations
Completed 13 annotations
Completed 14 annotations
Completed 15 annotations
Completed 16 annotations
Completed 17 annotations
Completed 18 annotations
Completed 19 annotations
Completed 20 annotations
Completed 21 annotations
Completed 22 annotations
Completed 23 annotations
Completed 24 annotations
Completed 25 annotations
Completed 26 annotations
Completed 27 annotations
Completed 28 annotations
Completed 29 annotations
Completed 30 annotations
Completed 31 annotations
Completed 32 annotations
Completed 33 annotations
Completed 34 annotations
Completed 35 annotations
Completed 36 annotations
Completed 37 annotations
Completed 38 annotations
Completed 39 annotations
Completed 40 annotations
Completed

In [None]:
print([df.shape[0] for df in automated_dfs])
print([df.shape[0] for df in automated_dfs_split])
print([df.shape[0] for df in automated_dfs_filtered])

#### Conclusion
The above shows that the filtering worked, in that it reduced the total number of annotations. The next section will show the statistics.

### Statistics

In [None]:
from statistics import *

In [None]:
manual_df = pd.read_csv("cosmos_annotations/cosmos_labeled_data_files_added.csv")
manual_df["IN FILE"] = manual_df["IN FILE"].apply(lambda x: " ".join(x.split("_")))
manual_df["FOLDER"] = "./cosmos_annotations/"
manual_df

In [None]:
import warnings
warnings.filterwarnings("ignore")
clip_stats_original = [clip_statistics(df, manual_df, "general") for df in automated_dfs]
clip_stats_filtered = [clip_statistics(df, manual_df, "general") for df in automated_dfs_filtered]

In [None]:
class_stats_original = [class_statistics(stats) for stats in clip_stats_original]
class_stats_filtered = [class_statistics(stats) for stats in clip_stats_filtered]

In [None]:
class_stats_original[0]

In [None]:
class_stats_filtered[0]

In [None]:
all_class_stats = [pd.concat([class_stats_original[i].assign(MODEL = "original"), class_stats_filtered[i].assign(MODEL = "filtered")]) for i in range(len(class_stats_original))]
all_class_stats = [df[df["MANUAL ID"] != "Lipaugus vociferans"] for df in all_class_stats]
all_class_stats[0]

In [None]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting each model separately
figure, axes = plt.subplots(1, len(class_stats_original), figsize = (30, 5), sharex = False, sharey = True)

x = 0
model_list = ["Tweety to File", "BirdNET-Lite Labels 05Conf", "BirdNET-Lite Labels 100", "BirdNET-Lite to Filename", "Microfaune to Filename"]

for model in model_list:
    plot = sns.barplot(ax = axes[x], data = all_class_stats[x], x = "MANUAL ID", y = "PRECISION", hue = "MODEL")
    for label in plot.get_xticklabels():
        label.set_rotation(90)
    plot.set(title = model)
    x += 1

In [None]:
figure, axes = plt.subplots(1, len(class_stats_original), figsize = (30, 5), sharex = False, sharey = True)

x = 0

for model in model_list:
    plot = sns.barplot(ax = axes[x], data = all_class_stats[x], x = "MANUAL ID", y = "RECALL", hue = "MODEL")
    for label in plot.get_xticklabels():
        label.set_rotation(90)
    plot.set(title = model)
    x += 1

### Visualizations

In [None]:
%matplotlib inline
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from sklearn.preprocessing import LabelEncoder

plt.style.use("ggplot")

#### Initial Embeddings

In [None]:
# 2D Visualization for initial embeddings
figure, axes = plt.subplots(3, 4, figsize = (30, 28), sharex = False)

x = 0
y = 0

pca_2d_list:list[PCA] = []

for species in embeddings_df["FILE SPECIES"].unique():
    pca_2d = PCA(n_components = 2, random_state = 42)
    pca_columns_2d = pd.DataFrame(pca_2d.fit_transform(embeddings_df[embeddings_df["FILE SPECIES"] == species][embeddingColumns].dropna())).T
    scatter_plot = sns.scatterplot(ax = axes[x, y], x = pca_columns_2d.iloc[0], y = pca_columns_2d.iloc[1])
    if y < 3:
        y += 1
    else:
        y = 0
        x += 1
    scatter_plot.set(title = species)
    pca_2d_list += [pca_2d]

figure.suptitle("PCA Visualization 2D (Original Embeddings)", size=60)

In [None]:
# 3D Visualization for initial embeddings
figure, axes = plt.subplots(3, 4, figsize = (30, 28), subplot_kw=dict(projection='3d'))

x = 0
y = 0

pca_3d_list:list[PCA] = []

for species in embeddings_df["FILE SPECIES"].unique():
    pca_3d = PCA(n_components = 3, random_state = 42)
    pca_columns_3d = pd.DataFrame(pca_3d.fit_transform(embeddings_df[embeddings_df["FILE SPECIES"] == species][embeddingColumns].dropna())).T
    scatter_plot = axes[x, y].scatter(pca_columns_3d.iloc[0], pca_columns_3d.iloc[1], pca_columns_3d.iloc[2])
    axes[x, y].title.set_text(species)
    if y < 3:
        y += 1
    else:
        y = 0
        x += 1
    pca_3d_list += [pca_3d]

figure.suptitle("PCA Visualization 3D (Original Embeddings)", size=60)

#### Filtered Out Embeddings

In [None]:
# 2D Visualization for filtered embeddings
figure, axes = plt.subplots(3, 4, figsize = (30, 28), sharex = False)

x = 0
y = 0

i = 0
for species in embeddings_df["FILE SPECIES"].unique():
    pca_columns_2d = pd.DataFrame(pca_2d_list[i].transform(filtered_embeddings[filtered_embeddings["FILE SPECIES"] == species][embeddingColumns].dropna())).T
    scatter_plot = sns.scatterplot(ax = axes[x, y], x = pca_columns_2d.iloc[0], y = pca_columns_2d.iloc[1])
    if y < 3:
        y += 1
    else:
        y = 0
        x += 1
    i += 1
    scatter_plot.set(title = species)

figure.suptitle("PCA Visualization 2D (Filtered Embeddings)", size=60)

In [None]:
# 3D Visualization for filtered embeddings
figure, axes = plt.subplots(3, 4, figsize = (30, 28), sharex = False, subplot_kw=dict(projection='3d'))

x = 0
y = 0

i = 0
for species in embeddings_df["FILE SPECIES"].unique():
    pca_columns_3d = pd.DataFrame(pca_3d_list[i].transform(filtered_embeddings[filtered_embeddings["FILE SPECIES"] == species][embeddingColumns].dropna())).T
    scatter_plot = axes[x, y].scatter(pca_columns_3d.iloc[0], pca_columns_3d.iloc[1], pca_columns_3d.iloc[1])
    axes[x, y].title.set_text(species)
    if y < 3:
        y += 1
    else:
        y = 0
        x += 1
    i += 1

figure.suptitle("PCA Visualization 3D (Filtered Embeddings)", size=60)