In [1]:
from birdclef.utils import get_spark
from IPython.display import Image, display

spark = get_spark()
display(spark)

# read straight from the bucket
df_meta = spark.read.csv(
    "gs://dsgt-clef-birdclef-2024/data/raw/birdclef-2023/train_metadata.csv"
)
df_meta.printSchema()
df_meta.show(vertical=True, n=1, truncate=100)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/03/23 22:17:46 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/03/23 22:17:47 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).


                                                                                

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: string (nullable = true)
 |-- _c4: string (nullable = true)
 |-- _c5: string (nullable = true)
 |-- _c6: string (nullable = true)
 |-- _c7: string (nullable = true)
 |-- _c8: string (nullable = true)
 |-- _c9: string (nullable = true)
 |-- _c10: string (nullable = true)
 |-- _c11: string (nullable = true)

-RECORD 0----------------
 _c0  | primary_label    
 _c1  | secondary_labels 
 _c2  | type             
 _c3  | latitude         
 _c4  | longitude        
 _c5  | scientific_name  
 _c6  | common_name      
 _c7  | author           
 _c8  | license          
 _c9  | rating           
 _c10 | url              
 _c11 | filename         
only showing top 1 row



In [2]:
df_meta.show(n=5)

+-------------+----------------+--------+--------+---------+------------------+--------------------+-------------+--------------------+------+--------------------+--------------------+
|          _c0|             _c1|     _c2|     _c3|      _c4|               _c5|                 _c6|          _c7|                 _c8|   _c9|                _c10|                _c11|
+-------------+----------------+--------+--------+---------+------------------+--------------------+-------------+--------------------+------+--------------------+--------------------+
|primary_label|secondary_labels|    type|latitude|longitude|   scientific_name|         common_name|       author|             license|rating|                 url|            filename|
|      abethr1|              []|['song']|  4.3906|  38.2788|Turdus tephronotus|African Bare-eyed...|Rolf A. de By|Creative Commons ...|   4.0|https://www.xeno-...|abethr1/XC128013.ogg|
|      abethr1|              []|['call']| -2.9524|  38.2921|Turdus tephrono

In [2]:
import os
from pathlib import Path

in_root = Path("data/raw/birdclef-2023/train_audio")
out_dir = Path("data/encodec_embeddings")

os.makedirs(out_dir, exist_ok=True)

out_path = out_dir / "embeddings.parquet"

In [3]:
from encodec import EncodecModel
import torchaudio

bandwidth = 3.0
model = EncodecModel.encodec_model_24khz()
model.set_target_bandwidth(bandwidth)



In [4]:
import torch
from encodec.utils import convert_audio

def compress(file):
    # Load and pre-process the audio waveform
    wav, sr = torchaudio.load(in_root / file)
    wav = convert_audio(wav, sr, model.sample_rate, model.channels)
    wav = wav.unsqueeze(0)

    # Extract discrete codes from EnCodec
    with torch.no_grad():
        encoded_frames = model.encode(wav)
    codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)
    
    return codes[0].flatten().numpy()

In [5]:
compress("eaywag1/XC639588.ogg").shape

(9740,)

In [6]:
df_meta_pd = df_meta.toPandas()
df_meta_pd.columns = df_meta_pd.iloc[0]
df_meta_pd = df_meta_pd.drop(df_meta_pd.index[0])
df_meta_pd

                                                                                

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
1,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg
2,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg
3,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363502,abethr1/XC363502.ogg
4,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/363503,abethr1/XC363503.ogg
5,abethr1,[],"['call', 'song']",-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,4.5,https://www.xeno-canto.org/363504,abethr1/XC363504.ogg
...,...,...,...,...,...,...,...,...,...,...,...,...
16937,yewgre1,[],[''],-1.2502,29.7971,Eurillas latirostris,Yellow-whiskered Greenbul,András Schmidt,Creative Commons Attribution-NonCommercial-Sha...,3.0,https://xeno-canto.org/703472,yewgre1/XC703472.ogg
16938,yewgre1,[],[''],-1.2489,29.7923,Eurillas latirostris,Yellow-whiskered Greenbul,András Schmidt,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://xeno-canto.org/703485,yewgre1/XC703485.ogg
16939,yewgre1,[],[''],-1.2433,29.7844,Eurillas latirostris,Yellow-whiskered Greenbul,András Schmidt,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://xeno-canto.org/704433,yewgre1/XC704433.ogg
16940,yewgre1,[],[''],0.0452,36.3699,Eurillas latirostris,Yellow-whiskered Greenbul,Lars Lachmann,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://xeno-canto.org/752974,yewgre1/XC752974.ogg


In [8]:
df_meta_head = df_meta_pd.head(5)
df_meta_head

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
1,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg
2,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg
3,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363502,abethr1/XC363502.ogg
4,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/363503,abethr1/XC363503.ogg
5,abethr1,[],"['call', 'song']",-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,4.5,https://www.xeno-canto.org/363504,abethr1/XC363504.ogg


In [9]:
df_meta_head.loc[:, "embeddings"] = df_meta_head.loc[:, "filename"].apply(compress)
df_meta_head

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_meta_head.loc[:, "embeddings"] = df_meta_head.loc[:, "filename"].apply(compress)


Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename,embeddings
1,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg,"[865, 835, 821, 151, 798, 798, 176, 176, 176, ..."
2,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg,"[62, 62, 62, 62, 408, 408, 835, 835, 835, 835,..."
3,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363502,abethr1/XC363502.ogg,"[62, 62, 62, 62, 408, 408, 408, 408, 408, 408,..."
4,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/363503,abethr1/XC363503.ogg,"[62, 1019, 432, 1017, 999, 724, 228, 724, 430,..."
5,abethr1,[],"['call', 'song']",-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,4.5,https://www.xeno-canto.org/363504,abethr1/XC363504.ogg,"[62, 62, 62, 62, 408, 408, 408, 408, 408, 408,..."


In [10]:
df_meta_head.loc[1, 'embeddings'].shape

(13688,)

In [11]:
test_path = out_dir / "test.parquet"
df_meta_head.to_parquet(test_path)

In [12]:
from tqdm import tqdm

tqdm.pandas()
df_meta_pd.loc[:, "embeddings"] = df_meta_pd.loc[:, "filename"].progress_apply(compress)

  0%|          | 0/16941 [00:00<?, ?it/s]

  0%|          | 42/16941 [02:13<14:52:46,  3.17s/it]


KeyboardInterrupt: 

In [13]:
df_meta_head.loc[:, "embeddings"] = df_meta_head.loc[:, "filename"].progress_apply(compress)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:16<00:00,  3.31s/it]


In [14]:
df_meta_head.loc[:, "embeddings"] = df_meta_head.loc[:, "filename"].apply(compress)

In [7]:
torch.cuda.is_available()

True

## Test 1s concatenation

In [35]:
import librosa

file = "whitenoise.wav"
length = librosa.get_duration(path=(in_root / file))
emb_size = compress(file).shape[0] / 4
print(length, emb_size, emb_size / length)

10.000022675736961 751.0 75.09982970560158
