In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib widget

def read_omg_csv(path_palm_data: str, 
                 n_omg_channels: int = 50, 
                 n_acc_channels: int = 3, 
                 n_gyr_channels: int = 3,
                 n_enc_channels: int = 6,
                 n_mag_channels: int = 0, 
                 button_ch: bool = True, 
                 sync_ch: bool = True, 
                 timestamp_ch: bool = True) -> pd.DataFrame:
    
    '''
    Reads CSV data for OMG data
    NB: data must be separated by " " separator

        Parameters:
                path_palm_data  (str): path to csv data file
                n_omg_channels  (int): Number of OMG channels
                n_acc_channels  (int): Number of Accelerometer channels, default = 0
                n_gyr_channels  (int): Number of Gyroscope channels, default = 0
                n_mag_channels  (int): Number of Magnetometer channels, default = 0
                n_enc_channels  (int): Number of Encoder channels, default = 0
                button_ch      (bool): If button channel is present, default = True
                sync_ch        (bool): If synchronization channel is present, default = True
                timestamp_ch   (bool): If timestamp channel is present, default = True

        Returns:
                df_raw (pd.DataFrame): Parsed pandas Dataframe with OMG data
    '''
    
    df_raw = pd.read_csv(path_palm_data, sep=' ', 
                         header=None, 
                         skipfooter=1, 
                         skiprows=1, 
                         engine='python')
    columns = np.arange(n_omg_channels).astype('str').tolist()
    
    for label, label_count in zip(['ACC', 'GYR', 'MAG', 'ENC'], 
                                  [n_acc_channels, n_gyr_channels, n_mag_channels, n_enc_channels]):
        columns = columns + ['{}{}'.format(label, i) for i in range(label_count)]
        
    if button_ch:
        columns = columns + ['BUTTON']
        
    if sync_ch:
        columns = columns + ['SYNC']
        
    if timestamp_ch:
        columns = columns + ['ts']
        
    df_raw.columns = columns
    
    return df_raw

gestures_train = read_omg_csv('sprint2_pilote1_gestures.palm')
gestures_train_2 = read_omg_csv('sprint2_pilote2_gestures.palm')
free_movements = read_omg_csv('sprint2_pilote1_fingers.palm')
free_movements_2 = read_omg_csv('sprint2_pilote2_fingers.palm')


#gestures_train = read_omg_csv('sprint2_pilote1_gestures.palm').iloc[:26327]
#gestures_test = read_omg_csv('sprint2_pilote1_gestures.palm').iloc[26327:]
#gestures_train_2 = read_omg_csv('sprint2_pilote2_gestures.palm').iloc[:18655]
#gestures_test_2 = read_omg_csv('sprint2_pilote2_gestures.palm').iloc[18655:]
#free_movements = read_omg_csv('sprint2_pilote1_fingers.palm').iloc[12836:]
#free_movements_test = read_omg_csv('sprint2_pilote1_fingers.palm').iloc[:12836]
#free_movements_2 = read_omg_csv('sprint2_pilote2_fingers.palm').iloc[:9078]
#free_movements_2_test = read_omg_csv('sprint2_pilote2_fingers.palm').iloc[9078:]

OMG_CH = [str(i) for i in range(50)]
GLOVE_CH = ['ENC0', 'ENC1', 'ENC2', 'ENC3', 'ENC4', 'ENC5']

# Соединим данные 2-х пилотов
X_pilots = np.vstack([gestures_train[OMG_CH].values, gestures_train_2[OMG_CH].values])
y_pilots = np.vstack([gestures_train[GLOVE_CH].values, gestures_train_2[GLOVE_CH].values])

X_free = np.vstack([free_movements[OMG_CH], free_movements_2[OMG_CH]])
y_free = np.vstack([free_movements[GLOVE_CH], free_movements_2[GLOVE_CH]])

# Соединим протокольные и свободные жесты обоих пилотов
X_all = np.vstack([X_pilots, X_free])
y_all = np.vstack([y_pilots, y_free])

#X_pilots_test = np.vstack([gestures_test[OMG_CH].values, gestures_test_2[OMG_CH].values])
#y_pilots_test = np.vstack([gestures_test[GLOVE_CH].values, gestures_test_2[GLOVE_CH].values])
#X_free_test = np.vstack([free_movements_test[OMG_CH], free_movements_2_test[OMG_CH]])
#y_free_test = np.vstack([free_movements_test[GLOVE_CH], free_movements_2_test[GLOVE_CH]])
#X_all_test = np.vstack([X_pilots_test, X_free_test])
#y_all_test = np.vstack([y_pilots_test, y_free_test])

# Для предпроцессинга обучим классификатор (для определения действия/бездействия)
def get_diff(array, step=1, threshold=500):
    """_вычислятель изменений датчиков
        за step - шагов,
        с отсечением всего что ниже threshold - уровня_
    """
    new_array = np.zeros(array.shape[0])
    for i in np.arange(array.shape[0]):
        if i == 0:
            pass
        else:
            new_array[i] = np.sum(abs(array[i-step] - array[i]))
    new_array[new_array < threshold] = 0
    new_array[new_array > 0] = 1
    return new_array

X_all_diff = get_diff(X_all)
#X_all_diff_test = get_diff(X_all_test)

from sklearn.linear_model import RidgeClassifier

ridge = RidgeClassifier().fit(X_all, X_all_diff)
X_all_class = ridge.predict(X_all)
#X_all_class_test = ridge.predict(X_all_test)
X_new = np.insert(X_all, -1, X_all_class, axis=1)
#X_new_test = np.insert(X_all_test, -1, X_all_class_test, axis=1)

X_train = X_new
y_train = y_all
#X_valid = X_new_test
#y_valid = y_all_test

X_train.shape, y_train.shape

((95567, 51), (95567, 6))

In [2]:
# ~ 18.5s
from sklearn.multioutput import MultiOutputRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LassoLars
from sklearn.ensemble import VotingRegressor

estimators = [
    ('dt', DecisionTreeRegressor()),('ll', LassoLars(alpha=0.1))
]
model_vr = MultiOutputRegressor(VotingRegressor(estimators=estimators, weights=[0.45, 0.55])).fit(X_train, y_train)

In [3]:
# ~ 20s
import xgboost as xgb

# параметры оптимизированы optun'ой
params = {'learning_rate': 0.06329973864656831,
 'max_depth': 6,
 'subsample': 0.9814063371832862,
 'colsample_bytree': 0.41087811860602663,
 'min_child_weight': 8}

model_xgb = xgb.XGBRegressor(**params)
model_xgb.fit(X_train, y_train)

In [4]:
# ~ 26s
import xgboost as xgb

# raw model
model_xgb_raw = xgb.XGBRegressor()
model_xgb_raw.fit(X_train, y_train)

In [5]:

# ~ 0.1s
df_sim = read_omg_csv('sprint2_pilote1_gestures.palm').iloc[:10000]
#df_sim = read_omg_csv('sprint2_pilote2_gestures.palm').iloc[20000:]
#df_sim = read_omg_csv('sprint2_pilote1_fingers.palm').iloc[:10000]
#df_sim = read_omg_csv('sprint2_pilote2_fingers.palm').iloc[:10000]
df_sim.shape

(10000, 65)

In [6]:
# ~ 0.1s
import time
import serial
from IPython.display import clear_output

crc_table = [
    54, 181, 83, 241, 89, 16, 164, 217, 34, 169, 220, 160, 11, 252,
    111, 241, 33, 70, 99, 240, 234, 215, 60, 206, 68, 126, 152, 81,
    113, 187, 14, 21, 164, 172, 251, 16, 248, 215, 236, 90, 49, 53,
    179, 156, 101, 55, 65, 130, 161, 22, 218, 79, 24, 168, 152, 205,
    115, 141, 23, 86, 141, 58, 122, 215, 252, 48, 69, 115, 138, 66,
    88, 37, 63, 104, 176, 46, 139, 246, 222, 184, 103, 92, 154, 174,
    97, 141, 195, 166, 227, 150, 140, 48, 121, 243, 13, 131, 210, 199,
    45, 75, 180, 104, 97, 82, 251, 90, 132, 111, 229, 175, 146, 216,
    153, 86, 166, 33, 184, 100, 225, 248, 186, 54, 89, 39, 2, 214, 2,
    114, 197, 6, 35, 188, 245, 64, 220, 37, 123, 132, 190, 60, 189, 53,
    215, 185, 238, 145, 99, 226, 79, 54, 102, 118, 210, 116, 51, 247,
    0, 191, 42, 45, 2, 132, 106, 52, 63, 159, 229, 157, 78, 165, 50,
    18, 108, 193, 166, 253, 3, 243, 126, 111, 199, 152, 36, 114, 147,
    57, 87, 14, 16, 160, 128, 97, 189, 51, 115, 142, 8, 70, 71, 55, 42,
    193, 65, 207, 122, 158, 26, 21, 72, 139, 33, 230, 230, 116, 134, 5,
    213, 165, 107, 41, 134, 219, 190, 26, 29, 136, 174, 190, 108, 185, 172,
    137, 239, 164, 208, 207, 206, 98, 207, 12, 0, 174, 64, 20, 90, 49, 12,
    67, 112, 109, 78, 114, 165, 244, 183, 121
];
    
def get_crc(data, count):
    result = 0x00
    l = count
    while (count>0):   
        result = crc_table[result ^ data[l-count]]
        count -= 1;
    return result;

def drv_abs_one(ser, control):
    if ser is None:
        return
    pack = (np.array([80, 0x01, 0xBB, 6] + [c for c in control] + [0], dtype='uint8'))
    pack[-1] = get_crc(pack[1:], 9)
    return (pack, ser.write(bytearray(pack)))

## Что сделано:

* Данные для моделированния - последовательно соединённые данные 2-х пилотов (train протокольных жестов и free_movements), дополнительно вручную нарезанные на train и valid

* Подготовлены к работе **2 модели** (`VotingRegressor`, `XGBRegressor`)

* Предпроцессинг - **добавляем 1 фичу** (которая является флагом действия/бездействия)

* Постпроцессинг:

* * **дискретизация таргетов** на заданное количество диапазонов (по умолчанию 20 диапазон, по 5 единиц в каждом) 

* * **экспоненциальный сглаживатель пиков** с повторным применением дискретизации (как-будто если сделать это пару раз, можно добиться сглаживания без длинных окон)

* * **сглаживатель пиков** по предыдущим показаниям с шагом 3 и 4 (по умолчанию шаг 3) даёт задержку между предсказанием и выводом на протез в 1 временной шаг

In [7]:
# ~ 0.1s
from sklearn.metrics import mean_squared_error as mse
import collections # нужно для deque (очередь нужна чтобы хранить пред-предыдущее предсказание)

def MSE(y, y_pred):
    """_получение среднеквадратичной ошибки для каждого таргета_

    """
    ENC0 = mse(y[:,0], y_pred[:,0])
    ENC1 = mse(y[:,1], y_pred[:,1])
    ENC2 = mse(y[:,2], y_pred[:,2])
    ENC3 = mse(y[:,3], y_pred[:,3])
    ENC4 = mse(y[:,4], y_pred[:,4])
    ENC5 = mse(y[:,5], y_pred[:,5])
    return pd.Series([ENC0, ENC1, ENC2, ENC3, ENC4, ENC5])

def preprocessing(x):
    """_добавление идентификатора действия/бездействия_
    """  
    return np.append(x, ridge.predict([x]))

def inference(x):
    """_предикт по одной из моделей_
    """
    #y = model_vr.predict([x])[0]
    y = model_xgb.predict([x])[0]
    #y = model_xgb_raw.predict([x])[0]
    return y

def postprocessing(array, step=10):
    """_дискретизация выходных сигналов по 100/step количеству уровней
        по умолчанию step=10 -> 10 уровней сигналов_
    """
    array[array < 0] = 0
    array = np.round(array / step, 0).astype(int) * step
    return array

def commands(x, prev):
    """_сглаживатель пиков по 2-м предыдущим шагам_
    """
    if prev is None:
        y = x
    else:
        y = x*0.5 + prev*0.5 # Holt-Winters filter
    return postprocessing(y)

#def commands(dq):
    """_сглаживатель пиков по 2-м предыдущим шагам_
    """
    if len(dq) < 2:
        return np.zeros(6)
    else:
        if (dq[-1] == dq[0]).any():
            dq[1][dq[-1] == dq[0]] = dq[0][dq[-1] == dq[0]]
    return dq[-1]

#def moving_average(dq, n=4):
    ret = np.cumsum(dq, dtype=float, axis=1)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [8]:
TIMEOUT = 0.066 # на исходном значении 0,033мс запускается раз через раз 
                #(хотя цикл вычислений на самой медленной модели занимает в среднем 0,021мс)
DEBUG = False
    
i = 0
ts_old = time.time()
ts_diff = 0;

#dq = collections.deque(maxlen=4)
y_prev = None


y_dct = {
    'omg_sample':[],
    'enc_sample':[],
    'sample_preprocessed':[],
    
    'y_predicted':[],
    'y_postprocessed':[],
    'y_commands':[],
    #'y_roll':[]
}
while True:    
    
    # [Data reading]
    ts_start = time.time()
    
    try:
        # [Sim data]
        if i < len(df_sim):
            sample = df_sim.values[i]
        else:
            break
        # [/Sim data]
        [omg_sample, acc_sample, enc_sample, [button, sync, ts]] = np.array_split(sample, [50, 56, 62])
        
    except Exception as e:
        print(e)
        
    # [/Data Reading]
        
    # [Data preprocessing]
    sample_preprocessed = preprocessing(omg_sample)
    # [/Data preprocessing]
    
    # [Inference]
    y_predicted         = inference(sample_preprocessed)
    # [/Inference]
    
    # [Inference Postprocessing]
    y_postprocessed   = postprocessing(y_predicted)
    # [/Inference Postprocessing]
        
    # [Commands composition]
    y_commands          = commands(y_postprocessed, y_prev)
    # [/Commands composition]
    
    #y_roll = moving_average(dq)
    
    # [Commands sending]
    # NO COMMANDS SENDING IN SIMULATION
    # [/Commands sending]
    
    # [Data logging]
    y_dct['omg_sample'].append(omg_sample)
    y_dct['enc_sample'].append(enc_sample)
    y_dct['sample_preprocessed'].append(sample_preprocessed)
    y_dct['y_predicted'].append(y_predicted)
    y_dct['y_postprocessed'].append(y_postprocessed)
    y_dct['y_commands'].append(y_commands)
    #y_dct['y_roll'].append(y_roll)
    # [/Data logging]

   
    y_prev = y_postprocessed
    #dq.append(y_commands)
    
    if DEBUG:
        clear_output(wait=True)

        # sanity check: Sizes of SAMPLE=65, OMG=50, ACC=6, ENCODERS=6
        print(f'SAMPLE SIZE: {len(sample)}, OMG: {len(omg_sample)}, ACC: {len(acc_sample)}, ENCODERS: {len(enc_sample)}')
        print(f'BUTTON: {button}, SYNC: {sync}, TS: {ts}')
        print(y_commands)
    
    ts_diff = time.time() - ts_start
    assert(ts_diff<TIMEOUT), 'Calculation cycle takes more than TIMEOUT, halting...'
    ts_old = ts_start
    i += 1 

In [9]:
for key, val in y_dct.items():
    # print(f"len({key}) = {len(y_dct[key])}")
    y_dct[key] = np.stack(val)
    print(f"{key}.shape = {y_dct[key].shape}")
    
metrics_test = {col : mse(y_dct['enc_sample'][:,col], y_dct['y_commands'][:,col]) for col in np.arange(6)}

metrics_test = pd.Series(metrics_test)
metrics_test

omg_sample.shape = (10000, 50)
enc_sample.shape = (10000, 6)
sample_preprocessed.shape = (10000, 51)
y_predicted.shape = (10000, 6)
y_postprocessed.shape = (10000, 6)
y_commands.shape = (10000, 6)


0     41.6263
1     34.0267
2     40.3520
3     43.3706
4    122.6701
5      0.0000
dtype: float64

### Online (prosthesis or virtual hand)

In [10]:
TIMEOUT = 0.066
DEBUG = True

dq = collections.deque(maxlen=3)

ser = None
# ser_port = None
ser_port = '/dev/cu.usbmodem3498365F31351'

if ser_port is not None:
    ser = serial.Serial(port=ser_port, timeout=2*TIMEOUT)
    ser.write('T1#\r\n'.encode('utf-8')) # T1 for Timestamp activate
    ser.write('M2#\r\n'.encode('utf-8')) # M2 for Mode == 2 = send samples
    ser.write('S2#\r\n'.encode('utf-8')) # SYNC to 2 for sanity check

    # flush buffers
    ser.reset_input_buffer()
    ser.read()
    
    i = 0;
    while(ser.in_waiting):
        print(f'Flushing buffers {i}: {ser.in_waiting}', end='    \r')
        ser.read_all()
        time.sleep(0.005)
        i+=1;
    ser.readline()
    ser.readline()
    
i = 0
ts_old = time.time()
ts_diff = 0;

y_previous = None
while True:    
    
    # [Data reading]
    s = ser.readline()
    ts_start = time.time()
    
    try:
        sample = np.array(s.decode('UTF-8')\
                           .replace('\r\n', "")\
                           .split(' ')
                         ).astype(int)
        [omg_sample, acc_sample, enc_sample, [button, sync, ts]] = np.array_split(sample, [50, 56, 62])
        
    except Exception as e:
        print(e)
        
    # [/Data Reading]
        
    # [Data preprocessing]
    sample_preprocessed = preprocessing(omg_sample)
    # [/Data preprocessing]
    
    # [Inference]
    y_predicted         = inference(sample_preprocessed)
    # [/Inference]
    
    # [Inference Postprocessing]
    y_postprocessed     = postprocessing(y_predicted)
    # [/Inference Postprocessing]
    
    # [Commands composition]
    y_commands          = commands(dq)
    # [/Commands composition]
    
    # [Commands sending]
    pack, _ = drv_abs_one(ser, list(y_commands)); # + [0]
    # [/Commands sending]
    
    dq.append(y_postprocessed)
    
    if DEBUG:
        clear_output(wait=True)

        # sanity check: iteration should increase monotonically, TIMEDIFF approximately 32-34 ms, CYCLETIME < TIMEOUT, WAITING should be == 0
        print(f'ITERATION:\t{i}\tTIMEDIFF:\t{(ts_start - ts_old)*1000: .0f}\tCYCLETIME:\t{ts_diff*1000:.0f}\tWAITING:\t{ser.in_waiting}')
        print('INPUT:\n', s)

        # sanity check: Sizes of SAMPLE=65, OMG=50, ACC=6, ENCODERS=6
        print(f'SAMPLE SIZE: {len(sample)}, OMG: {len(omg_sample)}, ACC: {len(acc_sample)}, ENCODERS: {len(enc_sample)}')
        print(f'BUTTON: {button}, SYNC: {sync}, TS: {ts}')
        print(y_commands)
        print(pack)
    
    ts_diff = time.time() - ts_start
    assert(ts_diff<TIMEOUT), 'Calculation cycle takes more than TIMEOUT, halting...'
    ts_old = ts_start
    i += 1 

SerialException: [Errno 2] could not open port /dev/cu.usbmodem3498365F31351: [Errno 2] No such file or directory: '/dev/cu.usbmodem3498365F31351'