In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt 
import numpy as np
from pathlib import Path
import random
import pandas as pd
from statistics import mean

In [3]:
data = pd.read_json('../4_accent_features_8k.json')

In [4]:
data

Unnamed: 0,lable,rms,zrc,sb,sc,mfcc
0,bangla,"[0.0013022722, 0.0017205017000000002, 0.001578...","[0.0712890625, 0.1396484375, 0.142578125, 0.13...","[795.9049247021, 844.9831554604, 890.971914505...","[801.0601386723, 851.0365678407, 978.604145012...","[[-443.1603393555, -422.7786865234, -423.59481..."
1,malayalam,"[0.0019007573, 0.0026969092, 0.0024940472, 0.0...","[0.0712890625, 0.1474609375, 0.1591796875, 0.1...","[751.0645191972, 761.4017819413, 771.918327785...","[782.1835638066, 861.6459650473, 854.403650816...","[[-428.3286437988, -393.4295654297, -394.17318..."
2,malayalam,"[0.0019902054000000002, 0.0023496253000000003,...","[0.06054687500000001, 0.138671875, 0.155273437...","[835.2683419914, 869.5909474385, 862.921266800...","[799.9404110335, 870.736104638, 901.550001952,...","[[-437.4763183594, -409.4303588867, -412.71850..."
3,odiya,"[0.0018824536, 0.0026095212, 0.002663172500000...","[0.06640625, 0.1396484375, 0.1376953125, 0.132...","[803.6664493043, 793.4924641575, 801.737168895...","[854.5985286383, 832.038648165, 830.6994559488...","[[-434.8092041016, -401.2186584473, -400.85339..."
4,bangla,"[0.00199213, 0.0028055811, 0.0028021345, 0.002...","[0.08984375, 0.17578125, 0.1552734375, 0.15429...","[853.9057514039, 815.3463371834, 875.239832466...","[971.5535952292, 916.9148191011, 966.702269077...","[[-399.463104248, -372.8232116699, -373.531829..."
...,...,...,...,...,...,...
6178,telugu,"[0.0014494726, 0.055635470900000004, 0.0765934...","[0.068359375, 0.1337890625, 0.099609375, 0.050...","[820.9125489137, 539.7206969134, 494.040615590...","[859.4978397385, 747.4675212448, 673.467429045...","[[-332.5925598145, -184.8822174072, -165.35314..."
6179,odiya,"[0.0021822276, 0.0029424643, 0.0027601738, 0.0...","[0.0791015625, 0.1455078125, 0.126953125, 0.14...","[875.3101201428, 802.6967022014, 788.821870706...","[958.2656348969, 846.3804996667, 836.464415526...","[[-411.5061035156, -379.0005493164, -384.05307..."
6180,malayalam,"[0.0018016612, 0.0022565140000000003, 0.002092...","[0.0390625, 0.091796875, 0.1083984375, 0.12402...","[840.8434179348, 874.0352141954, 849.199936017...","[854.2836269544, 847.1916806041, 860.230710413...","[[-448.7952880859, -421.9195251465, -422.11801..."
6181,bangla,"[0.0013226597000000001, 0.0016902991, 0.001800...","[0.0810546875, 0.169921875, 0.162109375, 0.155...","[799.3445132141, 838.1066019972, 842.924535838...","[876.3167666297, 949.672900248, 913.8801781294...","[[-453.154876709, -426.0098266602, -424.723693..."


In [7]:
def get_min_avg_max_std(column, data=data):
    row_data_min, row_data_avg, row_data_max, row_data_std = [], [], [], []
    for row in data[column]:
        row_data_min.append(min(row))
        row_data_avg.append(mean(row))
        row_data_max.append(max(row))
        row_data_std.append(np.std(row))
    
    data.drop(column, axis=1, inplace=True)
    data[f'min_{column}'] = row_data_min
    data[f'avg_{column}'] = row_data_avg
    data[f'max_{column}'] = row_data_max
    data[f'std_{column}'] = row_data_std
    
    return row_data_min, row_data_avg, row_data_max, row_data_std

In [8]:
get_min_avg_max_std('rms')
get_min_avg_max_std('zrc')
get_min_avg_max_std('sb')
_ = get_min_avg_max_std('sc')

([463.5663127393,
  435.9961683982,
  437.1410683729,
  401.0647835283,
  465.4743039734,
  447.2219205587,
  419.31292643,
  419.2639200757,
  540.9676733034,
  517.4999006037,
  441.1098083197,
  393.4340938438,
  431.8168578374,
  390.2115309882,
  422.9633280607,
  405.1460207483,
  447.3319043392,
  334.6207920761,
  363.621174838,
  401.861799784,
  446.8884999623,
  441.9901601873,
  388.1193211904,
  373.6320407851,
  393.3710688825,
  371.3696783668,
  426.2312568898,
  380.4979076644,
  451.7766768351,
  416.7909648198,
  428.9126360382,
  382.7467588378,
  369.7079217421,
  411.5138180276,
  310.9990004448,
  330.879210828,
  385.7835107266,
  438.6806725904,
  338.6312411243,
  416.7446572041,
  439.9346645097,
  358.9760652445,
  433.1263993071,
  369.566812741,
  354.0539060222,
  386.2087230428,
  543.7933343006,
  420.3667759905,
  372.9106516218,
  410.0446004151,
  417.0566255659,
  402.2898114266,
  632.9526362217,
  494.9321360682,
  519.6679781949,
  381.4106427165

In [9]:
data.drop('mfcc', axis=1)
data.to_json('../4_accents_compact_features_8k.json')

In [10]:
features = data.drop(['lable', 'mfcc'], axis=1)
features

Unnamed: 0,min_rms,avg_rms,max_rms,std_rms,min_zrc,avg_zrc,max_zrc,std_zrc,min_sb,avg_sb,max_sb,std_sb,min_sc,avg_sc,max_sc,std_sc
0,0.001302,0.085629,0.274967,0.087080,0.050781,0.176424,0.395508,0.073873,387.438403,803.934163,1168.027720,195.080457,463.566313,1003.997707,1942.338477,278.503024
1,0.001901,0.054131,0.191938,0.066031,0.050781,0.134560,0.245117,0.036232,378.685831,711.221839,1009.749549,139.905201,435.996168,800.498312,1349.526164,146.110740
2,0.001958,0.019840,0.125847,0.026418,0.047852,0.130952,0.367188,0.061415,477.778595,797.687435,1198.976339,165.281454,437.141068,844.235767,1682.045204,254.135464
3,0.001882,0.015328,0.066335,0.018860,0.046875,0.129330,0.250000,0.034370,363.824719,755.377248,980.044779,124.983393,401.064784,810.301636,1293.554625,155.625001
4,0.001992,0.054969,0.196143,0.062170,0.062500,0.142479,0.388672,0.052279,273.543067,741.103758,1300.716837,191.097219,465.474304,858.034120,2026.514792,265.196941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6178,0.001449,0.049301,0.126867,0.040507,0.041992,0.102352,0.268555,0.052828,316.337024,661.713862,1150.784442,180.930009,381.637938,686.748140,1597.983110,245.980583
6179,0.001966,0.020496,0.082519,0.023464,0.045898,0.139918,0.383789,0.056097,417.888161,791.410364,1253.398421,161.876691,423.111493,890.967987,1865.436616,280.222432
6180,0.001485,0.031932,0.126697,0.042253,0.039062,0.150211,0.463867,0.072414,374.246925,836.059971,1186.026940,109.493555,454.293864,964.311985,2156.357695,264.458575
6181,0.001323,0.059751,0.194759,0.072462,0.062500,0.164551,0.542969,0.082551,384.930257,785.124942,1147.130713,161.064346,381.207811,938.532349,2132.159282,301.114733


In [11]:
lables = np.array(data['lable'])

lables

array(['bangla', 'malayalam', 'malayalam', ..., 'malayalam', 'bangla',
       'telugu'], dtype=object)

In [12]:
from sklearn.model_selection import train_test_split

train_features, test_features, train_labels, test_labels = train_test_split(features, lables, test_size = 0.25, random_state = 42)

In [13]:
train_features.shape, test_features.shape, train_labels.shape, test_labels.shape

((4637, 16), (1546, 16), (4637,), (1546,))

In [16]:
lr_list = [0.05, 0.2, 0.25, 0.3, 0.4, 0.5, 0.1]
for lr in lr_list:
    model_gbm = GradientBoostingClassifier(n_estimators=1000,
                                           learning_rate=lr,
                                           max_depth=4,
                                           subsample=0.3,
                                           validation_fraction=0.1,
                                           n_iter_no_change=20,
                                           max_features='log2',
                                           verbose=1)
    model_gbm.fit(train_features, train_labels)
    print('---------------------------------------------------------------------------------------------------------')
    print('lr =', lr)
    print(model_gbm.score(test_features, test_labels))
    print('---------------------------------------------------------------------------------------------------------')
    print(classification_report(test_labels,model_gbm.predict(test_features)))
    print('---------------------------------------------------------------------------------------------------------')
    

      Iter       Train Loss      OOB Improve   Remaining Time 
         1           1.2356           0.0694            1.06m
         2           1.1649           0.0674            1.02m
         3           1.1241           0.0531           59.64s
         4           1.0702           0.0464           59.69s
         5           1.0167           0.0474            1.01m
         6           0.9532           0.0429           59.49s
         7           0.9342           0.0361           59.00s
         8           0.8930           0.0338            1.01m
         9           0.8547           0.0320            1.01m
        10           0.8341           0.0275           59.87s
        20           0.6150           0.0151            1.01m
        30           0.4977           0.0095            1.02m
        40           0.4104           0.0052           59.85s
        50           0.3461           0.0028           59.10s
        60           0.2998           0.0026           59.34s
       

         4           0.4511           0.0558           57.17s
         5           0.4052           0.0513           57.63s
         6           0.3690           0.0305           58.10s
         7           0.3359          -0.0238           58.19s
         8           0.3016        -785.3702           59.43s
         9        1746.5396           0.0170           59.87s
        10          88.4939         -47.9183           59.57s
        20         111.2999           0.0017           57.24s
        30         572.3567           0.0003           55.88s
        40         572.5003          -0.0003           55.41s
        50          88.3846     -111475.0300           55.05s
---------------------------------------------------------------------------------------------------------
lr = 0.4
0.8725743855109961
---------------------------------------------------------------------------------------------------------
              precision    recall  f1-score   support

      bangla       0.91

In [17]:
print(confusion_matrix(test_labels,model_gbm.predict(test_features)))

[[334   9   0  17]
 [  8 557  14  22]
 [  0  18 172   0]
 [ 14  25   1 355]]


In [None]:
[[322  14   0  24]
 [ 16 507  33  45]
 [  4  37 146   3]
 [ 29  49   8 309]]

In [18]:
test_labels, model_gbm.predict(test_features)

(array(['bangla', 'malayalam', 'bangla', ..., 'bangla', 'telugu', 'telugu'],
       dtype=object),
 array(['bangla', 'malayalam', 'bangla', ..., 'telugu', 'telugu', 'telugu'],
       dtype=object))