Import

In [1]:
from classification.datasets import Dataset
from classification.utils.audio_student import Feature_vector_DS
from sklearn.model_selection import StratifiedKFold,train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from classification.utils.utils import accuracy
import numpy as np
import os, threading
import matplotlib.pyplot as plt
from time import time

Initialisation

In [2]:
### TO RUN
dataset = Dataset()
classnames = dataset.list_classes()
### TO RUN
PATH = "data/feature_freq/"  # where to save the features matrices
model_dir = "data/models/"  # where to save the models
os.makedirs(PATH, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [3]:
myds = Feature_vector_DS(dataset, Nft=512, nmel=20, duration=950, shift_pct=0.0, fs=10900)
train_pct = 0.7
featveclen = len(myds["fire", 0])  # number of items in a feature vector
nitems = len(myds)  # number of sounds in the dataset
naudio = dataset.naudio  # number of audio files in each class
nclass = dataset.nclass  # number of classes
nlearn = round(naudio * train_pct)  # number of sounds among naudio for training

data_aug_factor = 1
class_ids_aug = np.repeat(classnames, naudio * data_aug_factor)

"Compute the matrixed dataset, this takes some seconds, but you can then reload it by commenting this loop and decommenting the np.load below"
X = np.zeros((data_aug_factor * nclass * naudio, featveclen))
for s in range(data_aug_factor):
    for class_idx, classname in enumerate(classnames):
        for idx in range(naudio):
            featvec = myds[classname, idx]
            X[s * nclass * naudio + class_idx * naudio + idx, :] = featvec
np.save(PATH + "feature_matrix_2D.npy", X)

class_ids_aug = np.repeat(classnames, naudio * data_aug_factor)
y = class_ids_aug.copy()

fonction pour thread

In [4]:
debut=500
fin=int(20e3)
pas=100
n_splits = 5
length = (fin-debut)//pas
t=0.0464

abscisse = np.zeros(length)
result = np.zeros(length)
std = np.zeros(length)


def fun(begin,end, step=pas, force=False, PCA_enable=False, NPCA=8):
    model_knn = RandomForestClassifier(150)
    print(step)
    pos_initial = (begin-debut)//step
    print(f"th : {begin}, {pos_initial}")

    t1 = time()
    if PCA_enable:
        pca = PCA(n_components=NPCA,whiten=True)
    for i in range(begin,end,step):
        #Creation/chargement des feature vector
        try:
            X = np.load(PATH+str(i)+"feature_matrix_2D.npy")
        except:
            myds_th = Feature_vector_DS(dataset, Nft=int(t*i), nmel=20, duration=5000, shift_pct=0.0,fs=i)
            featveclen=len(myds_th['fire', 0])
            X = np.zeros((data_aug_factor * nclass * naudio, featveclen))
            for s in range(data_aug_factor):
                for class_idx, classname in enumerate(classnames):
                    for idx in range(naudio):
                        featvec = myds_th[classname, idx]
                        X[s * nclass * naudio + class_idx * naudio + idx, :] = featvec
            np.save(PATH +str(i)+ "feature_matrix_2D.npy", X)
    
        #classification
        acc = []
        for _ in range(n_splits):
            
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)  # random_state=1

            mean = np.mean(X_train, axis=0)
            X_train_mean=X_train-mean
            X_learn_normalised = X_train_mean/ np.linalg.norm(X_train_mean, axis=1, keepdims=True)

            mean = np.mean(X_test, axis=0)
            X_val_mean=X_test-mean
            X_val_normalised = X_val_mean/ np.linalg.norm(X_val_mean, axis=1, keepdims=True)

            if PCA_enable:
                
                X_learn_normalised = pca.fit_transform(X_learn_normalised)
                X_val_normalised = pca.transform(X_val_normalised)

            if(len(X_learn_normalised[0])!=len(X_val_normalised[0])):
                raise(f"ERrOR {len(X_learn_normalised[0])}, {len(X_val_normalised[0])}")
            
            model_knn.fit(X_learn_normalised, y_train)
            prediction_knn = model_knn.predict(X_val_normalised)
            acc.append(accuracy(prediction_knn, y_test))


        k = pos_initial+(i-begin)//step
        result[k] = np.mean(acc)
        abscisse[k] = i
    t2=time()

    print(f"Fun\tfrom {begin} to {end} time ellapsed {t2-t1:.2f}")


Threading gogogo

In [None]:
number_of_thread = 4
threads = []

space = (fin-debut)//number_of_thread
commencement=debut
for i in range(number_of_thread):
    print(f"thread lauchn between {commencement+i*space-(commencement+i*space)%pas} and {commencement+(i+1)*space-(commencement+(i+1)*space)%pas+1}")
    thread = threading.Thread(target=fun, args=(commencement+i*space, commencement+(i+1)*space,pas))
    thread.start()
    threads.append(thread)

# Attendre que tous les threads se terminent
for thread in threads:
    thread.join()

#Plot
    
plt.plot(abscisse,result)
plt.show()


thread lauchn between 500 and 5301
100
th : 500, 0
thread lauchn between 5300 and 10201
100
th : 5375, 48
thread lauchn between 10200 and 15101
100
th : 10250, 97
thread lauchn between 15100 and 20001
100
th : 15125, 146


In [None]:
number_of_thread = 5
threads = []

space = (fin-debut)//number_of_thread
commencement=debut
for i in range(number_of_thread):
    print(f"thread lauchn between {commencement+i*space-(commencement+i*space)%pas} and {commencement+(i+1)*space-(commencement+(i+1)*space)%pas+1}")
    thread = threading.Thread(target=fun, args=(commencement+i*space-(commencement+i*space)%pas, commencement+(i+1)*space-(commencement+(i+1)*space)%pas+1,pas, False,True))
    thread.start()
    threads.append(thread)

# Attendre que tous les threads se terminent
for thread in threads:
    thread.join()

#Plot
    
plt.plot(abscisse,result)
plt.show()

In [None]:
couleur=["green","purple","yellow","orange"]
for i in range(5000,1000,1000):

    lim = np.where(abscisse==i)[0][0]
    p,m = np.polyfit(abscisse[:lim],result[:lim],1)
    p1,m1 = np.polyfit(abscisse[lim:],result[lim:],1)
    print(p1)
    plt.plot(abscisse,result)
    plt.plot(abscisse[:lim],p*abscisse[:lim]+m,color=couleur[(i-5000)//1000])
    plt.plot(abscisse[lim:],p1*abscisse[lim:]+m1,color=couleur[(i-5000)//1000])
    plt.show()

