In [0]:
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd
import librosa
import librosa.display
import os
from os import path
import io
!pip3 install soundfile
!pip3 install librosa
import soundfile as sf
import librosa.feature
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
from collections import Counter

In [0]:
!pip install ipython-autotime

%load_ext autotime

In [0]:
BASE_PATH = '/content/drive/My Drive/DCASE 2020 Challenge/TAU Urban Acoustic Scenes 2020 3Class/'

# Carregando modelos

In [0]:
transp_resto_file = BASE_PATH + 'linearsvc_transp_resto_alvaro.pkl'
io_file = BASE_PATH + 'linearsvc_indoor_outdoor.pkl'

In [0]:
model_transp_resto = joblib.load(transp_resto_file)
model_transp_resto

In [0]:
model_io = joblib.load(io_file)
model_io

# Carregando Dataset de avaliação

In [0]:
names = joblib.load('/content/drive/My Drive/DCASE 2020 Challenge/TAU Urban Acoustic Scenes 2020 3Class/test_names.pkl')

# Importando o sinal de um audio

In [0]:
def get_signal(filename, DIR_AUDIO):
   
    signal, sr = sf.read(os.path.join(DIR_AUDIO, filename))
    signal_ = signal - np.mean(signal)
    signal_ = signal_ / np.std(signal_)
    return [signal, signal_]

In [0]:
def is_indoor(signal):
    feats_io_model = get_features_indoor_outdoor(signal)
    #print(feats_io_model)
    scaler = joblib.load(BASE_PATH + 'scaler_io.pkl')
    X = scaler.transform(feats_io_model.values)
    predicted = model_io.predict(X)
    #predicted =  (np.asarray(predicted)).reshape(1,-1)[0]
    counter = Counter(predicted)
    print(counter)
    final_prediction = max(counter, key=counter.get)
    #print(final_prediction)
    return (final_prediction == 'indoor')    

In [0]:
def is_transportation(signal):
    feats_transp_model = get_features_transp_model(signal)
    feats_transp_model = feats_transp_model.reshape(1, -1)
    scaler = joblib.load(BASE_PATH + 'scaler_transp_alvaro.pkl')
    X = scaler.transform(feats_transp_model)
    predicted = model_transp_resto.predict(X)
    return (predicted[0] == 'yes_transp')    

In [0]:
def get_centroid(y, sr):
    if sr == 0:
        return librosa.feature.spectral_centroid(y=y)
    else:
        return librosa.feature.spectral_centroid(y=y, sr=sr)
def get_flatness(y):
    return librosa.feature.spectral_flatness(y=y)
def get_rms(y):
    return librosa.feature.rms(y=y)
def get_mfccs(y, sr):
    return librosa.feature.mfcc(y=y, sr=sr, n_mfcc=5)

In [0]:
def get_features_transp_model(signal)->pd.DataFrame:  
    SAMPLERATE = 48000 
    music_features = []
    signal = signal[:,0]
    arr_mfcc = []
    c = get_centroid(signal, 0)
    f = get_flatness(signal)
    rms = get_rms(signal)
    mfccs = get_mfccs(signal, SAMPLERATE)
    mfccs_mean = [np.mean(x) for x in mfccs]
    mfccs_std = [np.std(x) for x in mfccs]
    music_features = np.hstack([arr_mfcc, np.array([np.mean(rms),np.std(rms),np.mean(f), np.std(f),np.mean(c),np.std(c)]), mfccs_mean, mfccs_std])       
    return music_features

In [0]:
def get_features_indoor_outdoor(signal)->pd.DataFrame:
    PARTS = 10
    feats = (np.zeros((1,16))[0])
    music_features = []
    signal = signal[:,0]
    for second in np.array_split(signal, PARTS):
        arr_mfcc = []
        c = get_centroid(second, SAMPLERATE)
        f = get_flatness(second)
        rms = get_rms(second)
        mfccs = get_mfccs(second, SAMPLERATE)
        mfccs_mean = [np.mean(x) for x in mfccs]
        mfccs_std = [np.std(x) for x in mfccs]
        music_features = np.hstack([arr_mfcc, np.array([np.mean(c),np.std(c),np.mean(f), np.std(f),np.mean(rms),np.std(rms)]), mfccs_mean, mfccs_std]) 
        feats = np.vstack([feats, music_features])  
    return pd.DataFrame(data=feats).drop(0).reset_index(drop=True)

In [0]:
len(names)

In [0]:
names

In [0]:
names_ = np.random.choice(names,100)

In [0]:
for filename in names:
    signal, signal_ = get_signal(filename, '/content/drive/My Drive/DCASE 2020 Challenge/TAU Urban Acoustic Scenes 2020 3Class/soundfiles/TAU-urban-acoustic-scenes-2020-3class-development/')
    if (is_transportation(signal)):
        row = pd.DataFrame(data=[[filename,'transportation',0,0,1]])
    else:
        if(is_indoor(signal_)):
            #print('is indoor')
            row = pd.DataFrame(data=[[filename,'indoor',1,0,0]])
        else:
            row = pd.DataFrame(data=[[filename,'outdoor',0,1,0]])
    output_df = pd.read_csv(BASE_PATH + 'teste_out.csv')
    row.to_csv(BASE_PATH + 'teste_out.csv', mode='a', header=False, index=False)

In [0]:
'''header = ['filename','scene_label','indoor','outdoor','transportation']
output_df = pd.DataFrame(columns=header)
output_df.to_csv(BASE_PATH + 'teste_out.csv', mode='w', header=True, index=False)'''

In [0]:
pd.read_csv(BASE_PATH + 'teste_out.csv')

In [0]:
a = pd.read_csv(BASE_PATH + 'teste_out.csv')
a[a['outdoor'] == 1].values.shape

In [0]:
a[a['indoor'] == 1].values.shape

In [0]:
a[a['transportation'] == 1].values.shape

In [0]:
DIR = '/content/drive/My Drive/DCASE 2020 Challenge/TAU Urban Acoustic Scenes 2020 3Class' 
DIR_AUDIO = DIR + '/soundfiles/TAU-urban-acoustic-scenes-2020-3class-development/'
df_meta = pd.read_csv(f'{DIR}/meta.csv')
df_meta = pd.read_csv(io.StringIO(df_meta.to_csv(index=False)))
df_meta.values.shape

In [0]:
teste_labeld = df_meta[df_meta['filename'].isin(names)]
teste_labeld.values.shape

In [0]:
mergedStuff = pd.merge(teste_labeld, a, on=['filename'], how='inner')
mergedStuff

In [0]:
(mergedStuff[mergedStuff['scene_label_x'] == mergedStuff['scene_label_y']]).values.shape

In [0]:
count1 = ((mergedStuff[mergedStuff['scene_label_x'] == mergedStuff['scene_label_y']]).values.shape)[0]

In [21]:
count2 = teste_labeld.values.shape[0]

time: 1.18 ms


In [22]:
count1/count2

0.7344734473447345

time: 2.25 ms
