In [1]:
import numpy as np
import torch
import pandas as pd
import os

# 统计数据

In [2]:
def dropnan(a):
    b = []
    for i in range(len(a)):
        if pd.isna(a[i]):
            return np.array(b)
        b.append(a[i])
    return np.array(b)

def count_execl(filename):
    data = pd.read_excel(filename, header=None)
    Len = []
    for i in range(len(data.values)):
        Len.append(len(dropnan(data.iloc[i,:].values))-1)
    return Len

def count_folder(foldername):
    data = []
    for filename in os.listdir(foldername):
        excelname = os.path.join(foldername, filename)
        Len = np.array(count_execl(excelname))
        Len = np.sort(Len)
        temp = [filename,'%.2f' %Len.mean(), np.median(Len), Len[0], Len[len(Len)-1], Len[len(Len)//20]]
        data.append(temp)
    return data

# 数据切割拉伸

In [3]:
from scipy.interpolate import interp1d

In [4]:
def zscore(X):
    return (X - X.mean()) / X.std()

def interpolate_excel(filename, stretch, is_norm):
    data = pd.read_excel(filename, header=None)
    new_features = []
    label = []
    for i in range(len(data.values)):
        row_data = dropnan(data.iloc[i,1:].values)
        fx = interp1d(np.arange(len(row_data)), row_data, kind='cubic')
        temp_features = fx(np.linspace(0,len(row_data)-1, stretch))
        new_features.append(temp_features)
        label.append(data.iloc[i,0])
    if is_norm:
        return zscore(torch.tensor(new_features)), torch.tensor(label)
    return torch.tensor(new_features), torch.tensor(label)

def interpolate_folder(foldername, stretch, is_norm):
    all_features, all_label = None, None
    for filename in os.listdir(foldername):
        excelname = os.path.join(foldername, filename)
        feature, label = interpolate_excel(excelname, stretch, is_norm)

        if all_features is None:
            all_features = feature
        else:
            all_features = torch.cat((all_features, feature), dim=0)

        if all_label is None:
            all_label = label
        else:
            all_label = torch.cat((all_label, label), dim=0)
    return all_features, all_label

def save_tensor(foldername, stretch, is_norm):
    f, l = interpolate_folder(foldername, stretch, is_norm)
    f = pd.DataFrame(f.detach().numpy())
    l = pd.DataFrame(l.detach().numpy())
    f.to_csv(foldername+'_feature.csv', header=None, index=False)
    l.to_csv(foldername+'_label.csv', header=None, index=False)

In [6]:
save_tensor('../test', 64, True);