In [1]:
import os
import threading
import socket
import time
import pickle
from libs.TCP import TCP_sender
from libs.TCP import TCP_receiver
from collections import defaultdict
from sklearn.mixture import GaussianMixture

from python_speech_features import *
import numpy as np
from scipy.io import wavfile as wav
import threading
import os
import sounddevice as sd
from libs.interface import ModelInterface

In [2]:
def calculate_nfft(samplerate, winlen):
    window_length_samples = winlen * samplerate
    nfft = 1
    while nfft < window_length_samples:
        nfft *= 2
    return nfft

def preprocess(energy, feat):
    bound = 1.5 * sum(energy[:100]) / 100
    ret = []
    for i in range(len(energy)):
        if energy[i] >= bound:
            ret.append(np.log(feat[i, :]))
    return np.array(ret)

def log_text(string):
    print("> ML: " + string)
    with open("log.txt", "a") as f:
        f.write("> ML: " + string + "\n")
        f.close()
    return

def load_list(path_and_file):
    with open(path_and_file, "r") as f:
        output_list = eval(f.readline())
        f.close()
    return output_list

def save_list(path_and_file, input_list):
    with open(path_and_file, "w") as f:
        f.write(str(input_list))
        f.close()
    return

def mkdir(path, directory):        
    if (not os.path.exists(path + directory)) or (not os.path.isdir(path + directory)):
        os.mkdir(path + directory)
    return

def listdir(path):
    file_list = os.listdir(path)
    if '.ipynb_checkpoints' in file_list:
        file_list.remove('.ipynb_checkpoints')
    return file_list

class ML_server:
    def __init__(self, path):
        self.path = path
        self.ip = None
        self.port = None
        self.socket = None
        self.quit = False
        self.train = 0
        self.test = 0
        self.voice = 0
        self.pid = -1
        self.train_states = {
            0: 'No train data',
            1: 'Is training',
            2: 'Train possible',
            3: 'Already trained'
        }
        self.test_states = {
            0: 'No model',
            1: 'Is Training',
            2: 'Test possible'
        }
        return
    
    def update_pid(self):
        k = 1
        while True:
            tpath = self.path + 'ML/train_data/p{}/'.format(k)
            if os.path.exists(tpath) and len(listdir(tpath)) > 0:
                k += 1
            else:
                return k
        return -1
        
    
    # TCP related functions ----------------------------------------------
    def set_ip(self, ip):
        self.ip = ip
        return
    
    def set_port(self, port):
        self.port = port
        return
    
    def get_socket(self):
        return self.socket
    
    # server operation function ------------------------------------------
    def init(self):
        # check if it is valid ML path
        if (not os.path.exists(self.path)) or (not os.path.isdir(self.path)):
            log_text("(init) Not existing ML path")
            return False
        
        if (not os.path.exists(self.path + 'ML/')) or (not os.path.isdir(self.path + 'ML/')):
            log_text("(init) Not existing ML folder")
            return False
        
        # make log.txt file
        if os.path.exists(self.path + "ML/log.txt"):
            os.remove(self.path + "ML/log.txt")
            
        # read ML_start.txt file
        if not os.path.exists(self.path + "ML/ML_start.txt"):
            log_text("(init) No ML_start.txt file")
            return False
        
        with open(self.path + "ML/ML_start.txt", "r") as f:
            port = f.readline().rstrip("\n")
            train_keywords = f.readline().rstrip("\n")
            test_keywords = f.readline().rstrip("\n")
            f.close()
        
        # check train keywords
        try:
            train_keywords = eval(train_keywords)
            if len(train_keywords) == 0:
                log_text("(init) No train keywords in the list.")
                return False
        except:
            log_text("(init) Exception for train keywords occurred.")
            return False

        # check testkeywords
        try:
            test_keywords = eval(test_keywords)
            if len(test_keywords) == 0:
                log_text("(init) No test keywords in the list.")
                return False
        except:
            log_text("(init) Exception for test keywords occurred.")
            return False
    
        # set basic ML folder
        mkdir(self.path + "ML/", "train_data/")
        mkdir(self.path + "ML/", "test_data/")
        mkdir(self.path + "ML/", "model/")
        
        # set keywords
        save_list(self.path + "ML/train_keywords.txt", train_keywords)
        save_list(self.path + "ML/test_keywords.txt", test_keywords)
        
        # set socket
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            if self.ip == None:
                self.ip = socket.gethostbyname(socket.gethostname())
            self.port = int(port)
            print((self.ip, self.port))
            s.bind((self.ip, self.port))
            s.listen(5)
        except:
            log_text("(init) Exception for binding occurred.")
            s.close()
            return False
        
        log_text("(init) ML server start with ip: {} and port {}".format(self.ip, self.port))
        self.socket = s
        self.quit = False
        self.train = 0
        self.test = 0
        self.voice = 0
        self.pid = self.update_pid()
        return True
    
    def end(self):
        try:
            self.socket.close()
        except:
            None
        log_text("(end) ML server stoped.")
        return
    
    def isquit(self):
        return self.quit
    
    # ML server funtioncality --------------------------------------------------------------
    def run(self, command, cs):
        command_list = command.split('+')
        if command_list[0] == 'update':
            log_text("(Update) Update request.")
            self.update()
            return str(self.voice*100 + self.train*10 + self.test)
        elif command_list[0] == 'connect':
            log_text("(Connect) Connection checked.")
            return "Connected"
        elif command_list[0] == 'trainrecv':
            log_text("(Recv) Receive train voice data")
            threading.Thread(target=self.Recv, args=(True, command_list[1], cs, )).start()
            return "recv"
        elif command_list[0] == 'testrecv':
            log_text("(Recv) Receive test voice data")
            #self.Recv(False, command_list[1], cs)
            threading.Thread(target=self.Recv, args=(False, command_list[1], cs, )).start()
            return "recv"
        elif command_list[0] == 'new':
            log_text("(New) Allocate new person ID on train voice data.")
            #mkdir(self.path + 'ML/train_data/', 'p{}'.format(self.pid))
            return str(self.pid)
        elif command_list[0] == 'end':
            log_text("(End) Allocate person ID on train voice data is done.")
            self.pid += 1
            mkdir(self.path + 'ML/train_data/', 'p{}'.format(self.pid))
            return ""
        elif command_list[0] == 'trainkey':
            log_text("(Key) Send train keywords")
            return '+'.join(load_list(self.path + "ML/train_keywords.txt"))
        elif command_list[0] == 'testkey':
            log_text("(Key) Send test keywords")
            return '+'.join(load_list(self.path + "ML/test_keywords.txt"))
        elif command_list[0] == 'train':
            log_text("(Train) Train request.")
            self.Train()
            return ""
        elif command_list[0] == 'test':
            log_text("(Test) Test request.")
            return self.Test()
        elif command_list[0] == 'quit':
            log_text("(Quit) Quit request.")
            self.quit = True
            return ""
        else:
            return ""
        return
    
    def update(self):
        def list_cmp():
            trained_list = load_list(self.path + 'ML/model/trained_list.txt')
            data_list = listdir(self.path + 'ML/train_data/')
            return set(trained_list) == set(data_list)
        self.voice = 0
        for person in listdir(self.path + 'ML/train_data/'):
            if len(listdir(self.path + 'ML/train_data/{}/'.format(person))) > 0:
                self.voice += 1
        if self.train != 1:
            if not os.path.exists(self.path + 'ML/model/model'):
                self.test = 0
            else:
                self.test = 2
            if self.voice == 0:
                self.train = 0
            elif os.path.exists(self.path + 'ML/model/trained_list.txt') and list_cmp():
                self.train = 3
            else:
                self.train = 2
        return
    
    def Recv(self, istrain, title, cs):
        print(cs)
        if istrain:
            TCP = TCP_receiver(cs)
            TCP.run(self.path + 'ML/train_data/p{}/{}.npy'.format(self.pid, title))
            # not done
            # recv file from pi and save as train data
            #
        else:
            TCP = TCP_receiver(cs)
            TCP.run(self.path + 'ML/test_data/{}.npy'.format(title))
            # not done
            # recv file from pi and save as test data
            #
        cs.close()
        return
    
    def Train(self):
        self.update()
        if (self.train == 0) or (self.train == 1):
            log_text("(Train) Train impoddible. Reason: " + self.train_states[self.train])
            return

        self.train = 1
        self.test = 1
        if os.path.exists(self.path + 'ML/model/model'):
            os.remove(self.path + 'ML/model/model')
        if os.path.exists(self.path + 'ML/model/trained_list.txt'):
            os.remove(self.path + 'ML/model/trained_list.txt')
        # not done
        # threading
        # (1) list <- listdir(ML_server['path'] + 'ML/train_data/')
        # (2) train model
        # (3) save_list(ML_server['path'] + 'ML/model/trained_list.txt', list)
        def train_model():
            person_list = listdir(self.path + 'ML/train_data/')
            
            person_feature = defaultdict(list)
            for person in person_list:
                filelist = listdir(self.path + 'ML/train_data/{}/'.format(person))
                if len(filelist) == 0:
                    continue
                for file in filelist:
                    signal = np.load(self.path + 'ML/train_data/{}/{}'.format(person, file))
                    fs = 44100
                    feat, energy = fbank(signal, samplerate = fs, nfft=calculate_nfft(fs, 0.025))
                    ffeat = preprocess(energy, feat)
                    person_feature[person].extend(ffeat)
                    
            person_gmms = []
            start_time = time.time()

            for name, feats in person_feature.items():
                gmm = GaussianMixture(n_components=20, max_iter = 1000)
                gmm.fit(feats, name)
                person_gmms.append(gmm)

            print('Trained done: {} seconds'.format(time.time() - start_time))
            
            with open(self.path + 'ML/model/model', 'wb') as f:
                pickle.dump(person_gmms, f)
                f.close()
            
            self.train = 3
            self.test = 2

            save_list(self.path + 'ML/model/trained_list.txt', person_list)
            return

        t = threading.Thread(target=train_model)
        t.start()
        return
    
    def Test(self):
        self.update()
        if self.test != 2:
            return "-1"
        # not done
        # loadl model
        # ret = predict
        # return ret
        
        person_list = load_list(self.path + 'ML/model/trained_list.txt')
        
        with open(self.path + 'ML/model/model', 'rb') as f:
            person_gmms = pickle.load(f)
            f.close()
        
        predict_list = []
        elements = []
        filelist = listdir(self.path + 'ML/test_data/')
        for file in filelist:
            signal = np.load(self.path + 'ML/test_data/{}'.format(file))
            fs = 44100
            feat, energy = fbank(signal, samplerate = fs, nfft=calculate_nfft(fs, 0.025))
            ffeat = preprocess(energy, feat)
            if len(ffeat) == 0:
                continue
            out = [pow(2.71, np.sum(gmm.score(ffeat)) / len(ffeat)) for gmm in person_gmms]
            print(out)
            output = out.index(max(out)) + 1
            predict_list.append(output)
            if not output in elements:
                elements.append(output)
        print('Test result: {}'.format(predict_list))
        ret = 0
        for e in elements:
            if predict_list.count(e) / len(predict_list) >= 0.6:
                ret = e
        for file in listdir(self.path + 'ML/test_data/'):
            os.remove((self.path + 'ML/test_data/{}'.format(file)))
        
        return str(ret)

In [3]:
def main():
    server = ML_server(input("Give ML_path: "))
    
    if not server.init():
        log_text("(init) Initalize Failed. Stop ML code.")
        server.end()
        return

    log_text("Start ML Server")
    s = server.get_socket()
    
    while True:
        cs, addr = s.accept()
        response = server.run(cs.recv(1024).decode().lower(), cs)
        if (len(response) > 0) and (not 'recv' in response):
            cs.send(response.encode())
        if not response == 'recv':
            cs.close()
        server.update()
        if server.isquit():
            break
    server.end()
    return

In [4]:
main()

Give ML_path: ./
('192.168.0.2', 14310)
> ML: (init) ML server start with ip: 192.168.0.2 and port 14310
> ML: Start ML Server
> ML: (Test) Test request.
[0.8902520852061524, 0.8860084646416354, 0.8837702099655511, 0.9012892558412888, 0.8876454640162095, 0.024298867074144748]
[0.8490548506776907, 0.8559404005282297, 0.8444527548409715, 0.8750123889962712, 0.8402746104600306, 0.04075007644842981]
[0.8967287973694616, 0.8922195417982903, 0.8921090542129292, 0.9054026928840355, 0.8917138058969825, 0.023970329601920498]
[0.8501599611887872, 0.863098331999697, 0.8471765121347486, 0.8769214577617175, 0.842268486644345, 0.042012320032808484]
[0.9055891890723539, 0.9035806240626499, 0.8997703716310487, 0.9127781994989598, 0.9067598343789398, 0.02956675775925111]
[0.8797814223983004, 0.8872596585265946, 0.8741805007901178, 0.8933348794548172, 0.8771636746418675, 0.04881675417574958]
Test result: [4, 4, 4, 4, 4, 4]
> ML: (Test) Test request.
Test result: []
> ML: (Update) Update request.
> ML: (