In [1]:
import pandas as pd
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
df_dev = pd.read_csv('development.csv', index_col=0)

In [3]:
def load_file(path):

    y, sr = librosa.load(path)
    y, _ = librosa.effects.trim(y=y, top_db=10, hop_length=10)
    duration = librosa.get_duration(y=y, sr=sr)
        
    return y, sr, duration

In [4]:
df_dev = df_dev[['path', 'action', 'object']]

In [5]:
df_dev[['info', 'sample_rate', 'duration']] = df_dev.apply(lambda r: load_file(r['path']), axis=1, result_type='expand')

In [6]:
df_dev.head()

Unnamed: 0_level_0,path,action,object,info,sample_rate,duration
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0a312...,change language,none,"[0.00911877, 0.016520254, -0.004310082, -0.015...",22050,0.968254
1,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0ee42...,activate,music,"[0.00027365616, 0.0010685086, 0.0017168283, 0....",22050,0.542404
2,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/1d9f3...,deactivate,lights,"[-0.0009428626, -0.0009029041, -0.00086400076,...",22050,1.070748
3,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/269fc...,increase,volume,"[-3.236027e-07, -4.1036277e-09, 4.031165e-07, ...",22050,0.13424
4,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/5bbda...,increase,volume,"[-5.562561e-07, 7.866835e-07, -1.804527e-07, -...",22050,1.377778


In [7]:
df_dev['duration'].max()

6.834920634920635

In [12]:
def remove_outliers(df):
    
    print("Old Shape: ", df.shape)

    upper = np.where(df['duration'] >= 3) 
    df.drop(upper[0], inplace = True)

    print("New Shape: ", df.shape)
    
    return df

In [13]:
df_dev = remove_outliers(df_dev)

Old Shape:  (9854, 6)
New Shape:  (9827, 6)


In [14]:
def add_padding(info, upper_bound):
    
    pad = np.zeros(upper_bound - len(info), dtype=np.float32)
    info = np.concatenate([info, pad],0)
    
    return info

In [15]:
info_length = [ len(info) for info in df_dev['info'] ]
max_length = max(info_length)

In [16]:
df_dev['info'] = df_dev.apply(lambda r: add_padding(r['info'], max_length), axis=1)

In [17]:
df_dev.head()

Unnamed: 0_level_0,path,action,object,info,sample_rate,duration
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0a312...,change language,none,"[0.00911877, 0.016520254, -0.004310082, -0.015...",22050,0.968254
1,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0ee42...,activate,music,"[0.00027365616, 0.0010685086, 0.0017168283, 0....",22050,0.542404
2,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/1d9f3...,deactivate,lights,"[-0.0009428626, -0.0009029041, -0.00086400076,...",22050,1.070748
3,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/269fc...,increase,volume,"[-3.236027e-07, -4.1036277e-09, 4.031165e-07, ...",22050,0.13424
4,dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/5bbda...,increase,volume,"[-5.562561e-07, 7.866835e-07, -1.804527e-07, -...",22050,1.377778


In [18]:
df_dev['mfcc'] = df_dev.apply(lambda r: librosa.feature.mfcc(y=r['info'], sr=22050), axis=1)

In [19]:
df_dev.drop(['path', 'info', 'sample_rate', 'duration'], axis=1, inplace=True)

In [20]:
df_dev.head()

Unnamed: 0_level_0,action,object,mfcc
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,change language,none,"[[-391.36572, -334.59875, -329.9948, -338.0268..."
1,activate,music,"[[-456.5393, -398.14557, -397.90173, -400.3576..."
2,deactivate,lights,"[[-608.87067, -579.16266, -592.0531, -617.7442..."
3,increase,volume,"[[-399.90738, -159.36209, -60.798008, -44.2711..."
4,increase,volume,"[[-586.8521, -509.29446, -504.02423, -524.8576..."


In [21]:
def cal_mean_std_blocks(info, n, m):
    
    h, v = info.shape
    
    means = []
    stds = []
        
    i = 0
    while True:
        
        ii = i + n
        if ii > h:
            ii = h
            
        j = 0
        while True:
            
            jj = j + m
            if jj > v:
                jj = v
                
            means.append(np.mean(info[i:ii, j:jj]))
            stds.append(np.std(info[i:ii, j:jj]))
            
            if jj == v:
                break
            else:
                j += m
            
        
        if ii == h:
            break
        else:
            i += n
        
            
    return means, stds

In [22]:
df_dev.iloc[0]['mfcc'].shape

(20, 130)

In [23]:
df_dev[['bls_mean', 'bls_std']] = df_dev.apply(lambda r: cal_mean_std_blocks(r['mfcc'], 1, 10), axis=1, result_type='expand')

In [25]:
mean_cols = [f'mean_block{i}' for i in range(20*13)]
means = pd.DataFrame(df_dev.bls_mean.tolist(), index=df_dev.index, columns=mean_cols)
df_dev = pd.concat((df_dev, means), axis=1)

In [26]:
std_cols = [f'std_block{i}' for i in range(20*13)]
stds = pd.DataFrame(df_dev.bls_std.tolist(), index=df_dev.index, columns=std_cols)
df_dev = pd.concat((df_dev, stds), axis=1)

In [27]:
df_dev.drop(['bls_mean', 'bls_std','mfcc'], axis=1, inplace=True)

In [28]:
df_dev.head()

Unnamed: 0_level_0,action,object,mean_block0,mean_block1,mean_block2,mean_block3,mean_block4,mean_block5,mean_block6,mean_block7,...,std_block250,std_block251,std_block252,std_block253,std_block254,std_block255,std_block256,std_block257,std_block258,std_block259
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,change language,none,-356.203796,-388.766907,-343.028473,-411.987061,-634.455688,-759.75354,-759.75354,-759.75354,...,3.987195,1.201935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,activate,music,-410.082947,-394.757629,-633.704102,-773.689026,-773.689026,-773.689026,-773.689026,-773.689026,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,deactivate,lights,-623.106384,-563.920349,-579.079712,-577.224548,-576.19165,-852.390991,-852.390991,-852.390991,...,5.709394,5.630404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,increase,volume,-316.254639,-717.285522,-717.285522,-717.285522,-717.285522,-717.285522,-717.285522,-717.285522,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,increase,volume,-550.202698,-583.736755,-531.322693,-547.182495,-681.888916,-645.443481,-792.129395,-806.178833,...,3.472549,3.296805,3.561493,2.887089,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
y = df_dev[['action', 'object']]
X = df_dev.drop(['action', 'object'], axis=1)

In [30]:
y_concat = y['action']+y['object']

In [44]:
pca_scaler = PCA(n_components=50).fit(X)

In [45]:
X_PCA = pca_scaler.transform(X)

In [46]:
scaler = StandardScaler().fit(X_PCA)

In [47]:
X_transformed = scaler.transform(X_PCA)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y_concat, test_size=0.2, shuffle=True, stratify=y_concat)

In [49]:
svc = SVC().fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.6642929806714141

<h3> Tunning SVM </h3>

In [50]:
svc = SVC(kernel='rbf', C=4, gamma='scale', class_weight='balanced').fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.6993896236012207

<h3> Submission </h3>

In [8]:
df_test = pd.read_csv('evaluation.csv', index_col=0)

In [9]:
df_test[['info', 'sample_rate', 'duration']] = df_test.apply(lambda r: load_file(r['path']), axis=1, result_type='expand')

In [10]:
df_test = df_test[['info', 'duration']]

In [11]:
df_test['duration'].max()

2.727437641723356

In [51]:
df_test['info'] = df_test.apply(lambda r: add_padding(r['info'], max_length), axis=1)

In [52]:
df_test['mfcc'] = df_test.apply(lambda r: librosa.feature.mfcc(y=r['info'], sr=22050), axis=1)

In [53]:
df_test.iloc[0]['mfcc'].shape

(20, 130)

In [54]:
df_test[['bls_mean', 'bls_std']] = df_test.apply(lambda r: cal_mean_std_blocks(r['mfcc'], 1, 10), axis=1, result_type='expand')

In [56]:
mean_cols = [f'mean_block{i}' for i in range(260)]
means = pd.DataFrame(df_test.bls_mean.tolist(), index=df_test.index, columns=mean_cols)
df_test = pd.concat((df_test, means), axis=1)

In [57]:
std_cols = [f'std_block{i}' for i in range(260)]
stds = pd.DataFrame(df_test.bls_std.tolist(), index=df_test.index, columns=std_cols)
df_test = pd.concat((df_test, stds), axis=1)

In [58]:
df_test.drop(['info', 'duration', 'mfcc', 'bls_mean', 'bls_std'], axis=1, inplace=True)

In [59]:
df_test.head()

Unnamed: 0_level_0,mean_block0,mean_block1,mean_block2,mean_block3,mean_block4,mean_block5,mean_block6,mean_block7,mean_block8,mean_block9,...,std_block250,std_block251,std_block252,std_block253,std_block254,std_block255,std_block256,std_block257,std_block258,std_block259
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-342.294006,-457.904236,-356.043518,-481.956207,-617.120605,-617.120605,-617.120605,-617.120605,-617.120605,-617.120605,...,5.788821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-258.892914,-249.393021,-278.44812,-247.98999,-398.591553,-562.826172,-562.826172,-562.826172,-562.826172,-562.826172,...,4.964605,4.33042,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-337.391937,-327.355164,-323.460052,-367.22348,-699.065796,-699.065796,-699.065796,-699.065796,-699.065796,-699.065796,...,2.827217,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-306.400818,-288.933197,-616.560425,-616.560425,-616.560425,-616.560425,-616.560425,-616.560425,-616.560425,-616.560425,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-242.72525,-268.238464,-155.275299,-422.023773,-610.489685,-610.489685,-610.489685,-610.489685,-610.489685,-610.489685,...,4.088653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
X_val = pca_scaler.transform(df_test)

In [61]:
X_val_transformed = scaler.transform(X_val)

In [62]:
y_pred_val = svc.predict(X_val_transformed)

In [63]:
df_test['Predicted'] = y_pred_val

In [64]:
df_test = df_test['Predicted']

In [66]:
df_test.to_csv('submit.csv')