In [1]:
import os 
import pandas as pd
import numpy as np

import IPython.display as ipd

import librosa
import librosa.display
import matplotlib.pyplot as plt

import processing
import event_manager
import epoching

pd.set_option('max_colwidth', 400)
pd.set_option('display.max_rows',100)

In [2]:
audio_folder = '../../data/ICBHI_final_database'
list_audio_files = processing.get_list_recording(audio_folder)
print('Found {} recording files'.format(len(list_audio_files)))

Found 920 recording files


In [3]:
n_fft =256
win_len=2
win_shift=0.250
audio_file = '222_1b1_Pr_sc_Meditron'
audio_file = '223_1b1_Lr_sc_Meditron'

audio_subject = '107_2b3_Ar_mc_AKGC417L' #crackle example
#audio_subject = '124_1b1_Pl_sc_Litt3200'


In [4]:
import model_prepare
data_file = processing.load_file_from_recording_name(audio_folder,audio_file)
data_filtered,df_label = processing.preprocess_data(data_file,lf=120,hf=5000,fs=22050, annotations='event')

In [5]:
df_label = event_manager.fill_gap_event_frame(df_label,data_filtered)
df_label['duration'] = df_label['end'] - df_label['start']

In [6]:
def lambda_count_label(row):
    unique, counts = np.unique(row, return_counts=True)
    return dict(zip(unique, counts))

data = data_filtered.copy()

In [7]:
times = data['time']
times =  np.arange(0,times.max()-win_len,win_shift)

df_segment = pd.DataFrame(columns=['start','end'])
df_segment['start'] = times
df_segment['end'] = df_segment['start'] + win_len

data_dict = dict()
for i in range(df_segment.shape[0]):
    index = np.where((data['time'] >= df_segment['start'].iloc[i]) & (data['time'] < df_segment['end'].iloc[i]))
    data_dict.update({df_segment['start'].iloc[i]:data['label'][index]})


In [8]:
threshold_class = [1,None,0.25]

In [9]:
data_dict = pd.Series(data_dict).reset_index()
data_dict.columns=['start','label']
data_dict['label']  = data_dict['label'].apply(lambda x: lambda_count_label(x)) 
data_dict = pd.concat([data_dict.drop(['label'], axis=1), data_dict['label'].apply(pd.Series)], axis=1)
data_dict = data_dict.fillna(0)
data_dict['total'] = data_dict[data_dict.columns[1:]].sum(axis=1)
df_segment = df_segment.merge(data_dict,on='start',how='left')

In [10]:
class_columns=df_segment.columns[2:-1].tolist()
df_segment.columns = ['label_' + str(c) if c in class_columns else c for c in df_segment.columns]

In [11]:
class_columns = [col for col in df_segment.columns if 'label' in col]
print(class_columns)

['label_0', 'label_2']


In [12]:
df_segment[class_columns] = df_segment[class_columns].div(df_segment['total'],axis=0)
df_segment = df_segment.drop(columns='total')

In [13]:
df_segment

Unnamed: 0,start,end,label_0,label_2
0,0.00,2.00,1.000000,0.000000
1,0.25,2.25,1.000000,0.000000
2,0.50,2.50,0.952948,0.047052
3,0.75,2.75,0.827937,0.172063
4,1.00,3.00,0.702948,0.297052
...,...,...,...,...
120,30.00,32.00,0.633696,0.366304
121,30.25,32.25,0.633696,0.366304
122,30.50,32.50,0.633696,0.366304
123,30.75,32.75,0.633696,0.366304


In [14]:
class_columns

['label_0', 'label_2']

In [15]:
for class_ in class_columns:
    label_=int(class_.split('_')[1])
    if threshold_class[label_]: 
        df_segment[label_] = np.where(df_segment[class_]>=threshold_class[label_],1,0)
    else:
        df_segment[label_] = np.where(df_segment[class_]!=0,1,0)

In [16]:
df_segment

Unnamed: 0,start,end,label_0,label_2,0,2
0,0.00,2.00,1.000000,0.000000,1,0
1,0.25,2.25,1.000000,0.000000,1,0
2,0.50,2.50,0.952948,0.047052,0,0
3,0.75,2.75,0.827937,0.172063,0,0
4,1.00,3.00,0.702948,0.297052,0,1
...,...,...,...,...,...,...
120,30.00,32.00,0.633696,0.366304,0,1
121,30.25,32.25,0.633696,0.366304,0,1
122,30.50,32.50,0.633696,0.366304,0,1
123,30.75,32.75,0.633696,0.366304,0,1


In [17]:
class_columns = [col for col in df_segment.columns if type(col)==int]
print(class_columns)


[0, 2]


In [18]:
df_segment['tot_label'] = df_segment[class_columns].sum(axis=1)
df_segment = df_segment[df_segment.tot_label==1]
df_segment['label'] = df_segment[class_columns].idxmax(axis=1)
df_segment.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_segment['label'] = df_segment[class_columns].idxmax(axis=1)


Unnamed: 0,start,end,label_0,label_2,0,2,tot_label,label
0,0.0,2.0,1.0,0.0,1,0,1,0
1,0.25,2.25,1.0,0.0,1,0,1,0
2,1.0,3.0,0.702948,0.297052,0,1,1,2
3,1.25,3.25,0.617143,0.382857,0,1,1,2
4,1.5,3.5,0.617143,0.382857,0,1,1,2
5,1.75,3.75,0.617143,0.382857,0,1,1,2
6,2.0,4.0,0.617143,0.382857,0,1,1,2
7,2.25,4.25,0.617143,0.382857,0,1,1,2
8,2.5,4.5,0.664195,0.335805,0,1,1,2
9,3.75,5.75,0.677392,0.322608,0,1,1,2


In [19]:
df_segment[df_segment.label==2]

Unnamed: 0,start,end,label_0,label_2,0,2,tot_label,label
4,1.0,3.0,0.702948,0.297052,0,1,1,2
5,1.25,3.25,0.617143,0.382857,0,1,1,2
6,1.5,3.5,0.617143,0.382857,0,1,1,2
7,1.75,3.75,0.617143,0.382857,0,1,1,2
8,2.0,4.0,0.617143,0.382857,0,1,1,2
9,2.25,4.25,0.617143,0.382857,0,1,1,2
10,2.5,4.5,0.664195,0.335805,0,1,1,2
15,3.75,5.75,0.677392,0.322608,0,1,1,2
16,4.0,6.0,0.610045,0.389955,0,1,1,2
17,4.25,6.25,0.610045,0.389955,0,1,1,2


In [25]:
data_filtered['fs']

22050

In [24]:
idx_ = np.where((data_filtered['time'] >= 30.75) & (data_filtered['time'] < 32.75))[0]
len(idx_)

44100

In [27]:
idx_ = np.where((data_filtered['time'] >= 30.954841) & (data_filtered['time'] < 31.687472))[0]
len(idx_)

16154

In [28]:
16154/44100

0.36630385487528344