# 读取数据

In [3]:
import pandas as pd
gaze = pd.read_csv('gaze_1017.csv')
# 数据的样式
gaze.head()

Unnamed: 0,gaze_x,gaze_y,gaze_t
0,"499.9281916929646,500.7827776075044,505.779561...","500.01446883798474,499.7674575377377,499.27408...","15.5,1094.5,1138.7999999523163,1167.1999999284..."
1,"500.0809183161369,501.2473190313305,505.520843...","499.94212464806105,500.00284609397926,501.3383...","18.899999999441206,7789.5,7835.700000000186,78..."
2,"976.0934688438953,940.3889573899594,902.095533...","427.87624362040066,394.97962288910423,441.4959...","16466.39999999944,16784.299999999814,16853.299..."
3,"500.11682246965455,500.8400332489199,502.42095...","500.08466949635516,499.9072253333237,499.27497...","23.299999999813735,1134.0999999996275,1188.399..."
4,"499.94855524272094,503.02118947543767,506.5767...","500.1119995236597,500.8045022214369,500.412437...","15.699999999254942,1119.1000000014901,1164.900..."


In [4]:
# 滤波器，用于生成label
import numpy as np
from scipy import signal

def preprocess_data(data):
    data = signal.medfilt(data, kernel_size=7)
    data = signal.medfilt(data, kernel_size=7)
    data = meanFilter(data, 5)
    data = meanFilter(data, 5)
    return data

def meanFilter(data, win):
    length = len(data)
    res = np.zeros(length)
    for i in range(length):
        s, n = 0, 0
        for j in range(i - win // 2, i + win - win // 2):
            if j < 0 or j >= length:
                continue
            else:
                s += data[j]
                n += 1
        res[i] = s / n
    return res



In [61]:
# 抽取每次记录的前300个gaze （之后要处理可变长）
train_data = []
train_label = []
test_data = []
test_label = []
cnt = 0
for i,item in enumerate(gaze['gaze_x']):
    if not pd.isnull(item):
        gaze_x = item.split(',')
        gaze_x = list(map(float,gaze_x))
        if len(gaze_x)>300:
            gaze_x = gaze_x[0:300]
            if i%10 == 0:
                test_data.append(gaze_x)
                test_label.append(preprocess_data(gaze_x))
            else:
                train_data.append(gaze_x)
                train_label.append(preprocess_data(gaze_x))

In [66]:
# Dataset
import numpy as np
from torch.utils.data import Dataset

# 定义GetLoader类，继承Dataset方法，并重写__getitem__()和__len__()方法
class GetLoader(Dataset):
	# 初始化函数，得到数据
    def __init__(self, data_root, data_label):
        self.data = data_root
        self.label = data_label
    # index是根据batchsize划分数据后得到的索引，最后将data和对应的labels进行一起返回
    def __getitem__(self, index):
        data = self.data[index]
        labels = self.label[index]
        return data, labels
    # 该函数返回数据大小长度，目的是DataLoader方便划分，如果不知道大小，DataLoader会一脸懵逼
    def __len__(self):
        return len(self.data)


source_data = np.array(train_data)

source_label = np.array(train_label)
# 通过GetLoader将数据进行加载，返回Dataset对象，包含data和labels
train_dataset = GetLoader(source_data, source_label)
test_dataset = GetLoader(np.array(test_data),np.array(test_label))
train_dataset,test_dataset

(<__main__.GetLoader at 0x1e6808c0c88>, <__main__.GetLoader at 0x1e6834878c8>)

In [63]:
# Dataloader
from torch.utils.data import DataLoader
# 读取数据
datas = DataLoader(train_dataset, batch_size=6, shuffle=True, drop_last=False, num_workers=0)

# 观察batch是都切割正确
for i, data in enumerate(datas):
	# i表示第几个batch， data表示该batch对应的数据，包含data和对应的labels
    print("第 {} 个Batch \n{}".format(i, data))

第 0 个Batch 
[tensor([[ 926.0704,  907.0727,  844.4589,  ...,  358.2976,  276.4622,
          286.8681],
        [ 783.2406,  771.6726,  750.8606,  ...,  694.7237,  768.8643,
          789.4265],
        [ 904.9043,  872.6360,  783.2129,  ...,  874.1408,  880.9165,
          871.2505],
        [1098.1707,  949.9972,  815.7106,  ..., 1039.5199, 1130.0296,
         1253.6942],
        [ 479.5659,  528.3553,  565.7857,  ...,  389.3262,  348.4798,
          373.9577],
        [ 500.3960,  503.3709,  505.0346,  ...,  357.4613,  378.3938,
          408.3999]], dtype=torch.float64), tensor([[798.4997, 786.0494, 767.1928,  ..., 409.1325, 378.0452, 350.7551],
        [768.7918, 772.0141, 774.9058,  ..., 693.3816, 694.1319, 694.4607],
        [775.2947, 777.4752, 782.0750,  ..., 869.4489, 872.0211, 871.9585],
        [709.5220, 695.1157, 678.4696,  ..., 989.3775, 991.9586, 994.1525],
        [535.3944, 543.6415, 552.5287,  ..., 421.2012, 403.2377, 388.9295],
        [500.3960, 500.3960, 500.0212,

In [64]:
# 创建模型
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
w = nn.Conv1d(in_channels=1,out_channels=1,kernel_size=(7,),stride=(1,),padding=3)
# 卷积核初始为均值滤波
ones=torch.Tensor([[[1/7,1/7,1/7,1/7,1/7,1/7,1/7]]])
w.weight = torch.nn.Parameter(ones)
w.bias = torch.nn.Parameter(torch.tensor([0.]))
class Filter_cnn(nn.Module):
    def __init__(self):
        super(Filter_cnn, self).__init__()
        self.model = nn.Sequential(
            w,
            nn.Linear(300,300)
        )
    def forward(self,input):
        return self.model(input)

net = Filter_cnn()
print(net)
writer = SummaryWriter('logs')

Filter_cnn(
  (model): Sequential(
    (0): Conv1d(1, 1, kernel_size=(7,), stride=(1,), padding=(3,))
    (1): Linear(in_features=300, out_features=300, bias=True)
  )
)


In [65]:
# 模型训练
import torch
import torch.optim as optimizer
# 定义损失函数和优化器
criterion_loss = nn.MSELoss()
opt = optimizer.Adam(net.parameters(), lr=0.001)

# 训练模型
Loss_list = []
correct_list = []
for epoch in range(300):
    correct = 0
    running_loss = 0
    for i,data in enumerate(datas):
        train_x,train_y = data

        if i==len(datas)-1 and epoch == 299:
            print("label:")
            print(train_y)

        train_x = train_x.to(torch.float32)
        train_y = train_y.to(torch.float32)
        train_x = train_x.unsqueeze(dim=1)
        outputs = net(train_x)

        outputs = outputs.squeeze(dim=1)

        if i==len(datas)-1 and epoch == 299:
            print("x after:")
            print(outputs)


        loss = criterion_loss(outputs,train_y)
        opt.zero_grad()
        loss.backward()
        opt.step()

        predicted = torch.max(outputs.data, 1)[1]
        y = torch.max(train_y.data, 1)[1]
        running_loss += loss.item()
        correct += (predicted == y).sum()
    epoch_loss = running_loss/len(datas)
    Loss_list.append(running_loss / len(datas))
    correct_list.append(correct / len(datas))
    loss = criterion_loss(outputs,train_y)
    if epoch % 50 == 0:
        print(f"epoch: {epoch} training loss: {epoch_loss: .3f}")
    writer.add_scalar("train loss",running_loss / len(datas),epoch)
writer.close()

epoch: 0 training loss:  329967.423
epoch: 50 training loss:  3185.387
epoch: 100 training loss:  1219.601
epoch: 150 training loss:  740.524
epoch: 200 training loss:  527.971
epoch: 250 training loss:  419.144
label:
tensor([[ 798.4997,  786.0494,  767.1928,  ...,  409.1325,  378.0452,
          350.7551],
        [ 707.5449,  714.7158,  721.5830,  ...,  697.4222,  637.5058,
          591.5787],
        [ 702.9649,  693.0793,  682.0253,  ..., 1669.8569, 1669.8384,
         1669.6447],
        [ 449.4384,  449.9037,  450.3839,  ...,  755.8499,  756.8450,
          757.7600]], dtype=torch.float64)
x after:
tensor([[ 750.4197,  748.1860,  742.5190,  ...,  393.8701,  362.5598,
          349.8388],
        [ 692.5229,  695.5389,  716.5919,  ...,  728.8058,  678.2328,
          620.9850],
        [ 702.2181,  701.8402,  699.2031,  ..., 1673.9840, 1685.8821,
         1677.7556],
        [ 461.6297,  440.7369,  454.0815,  ...,  764.7730,  769.4628,
          752.8448]], grad_fn=<SqueezeBackw

In [83]:
# 测试
Loss_list = []
correct_list = []

correct = 0
running_loss = 0
test_dataloader = DataLoader(train_dataset, batch_size=6, shuffle=True, drop_last=False, num_workers=0)
origin_gaze = []
filter_gaze = []
for i,data in enumerate(test_dataloader):
    train_x,train_y = data

    if i == len(test_dataloader)-1:
        print("label:")
        print(train_y)
        orgin_gaze = train_y

    train_x = train_x.to(torch.float32)
    train_y = train_y.to(torch.float32)
    train_x = train_x.unsqueeze(dim=1)
    outputs = net(train_x)

    outputs = outputs.squeeze(dim=1)

    if i == len(test_dataloader)-1:
        print("x after:")
        print(outputs)
        filter_gaze = outputs


    loss = criterion_loss(outputs,train_y)

    predicted = torch.max(outputs.data, 1)[1]
    y = torch.max(train_y.data, 1)[1]
    running_loss += loss.item()
    correct += (predicted == y).sum()
epoch_loss = running_loss/len(datas)
Loss_list.append(running_loss / len(datas))
correct_list.append(correct / len(datas))
loss = criterion_loss(outputs,train_y)

print(f"test loss: {epoch_loss: .3f}")

label:
tensor([[ 816.8229,  798.6071,  771.2704,  ...,  475.3143,  476.6481,
          476.4814],
        [1256.6191, 1273.0400, 1289.2490,  ..., 1408.4434, 1406.7444,
         1404.8259],
        [ 499.7174,  499.7324,  499.5138,  ..., 1529.1607, 1528.6186,
         1528.0640],
        [ 798.4997,  786.0494,  767.1928,  ...,  409.1325,  378.0452,
          350.7551]], dtype=torch.float64)
x after:
tensor([[ 823.1852,  826.0172,  788.5513,  ...,  463.2715,  471.3351,
          452.9034],
        [1251.1157, 1294.8629, 1300.1082,  ..., 1428.9242, 1429.8142,
         1399.0632],
        [ 498.1505,  516.4996,  508.9467,  ..., 1541.8059, 1547.9756,
         1533.6595],
        [ 752.5118,  762.9453,  739.9536,  ...,  403.3554,  371.6489,
          349.5734]], grad_fn=<SqueezeBackward1>)
test loss:  313.793


In [95]:
gaze_x = gaze['gaze_x'][0]
gaze_y = gaze['gaze_y'][0]
gaze_t = gaze['gaze_t'][0]
print(type(gaze_x))
print(type(gaze_y))
print(type(gaze_t))

<class 'str'>
<class 'str'>
<class 'str'>


In [100]:
def process(gaze):
    gaze = list(map(float,gaze))
    gaze = gaze[0:300]
    return gaze
gaze_x = process(gaze_x)
gaze_y = process(gaze_y)
gaze_t = process(gaze_t)
gaze_t

[15.5,
 1094.5,
 1138.7999999523163,
 1167.1999999284744,
 1208.6000000238419,
 1235.5,
 1268.6999999284744,
 1302.0,
 1335.1999999284744,
 1368.7999999523163,
 1405.8999999761581,
 1434.3999999761581,
 1471.7999999523163,
 1501.8999999761581,
 1538.1000000238419,
 1565.5,
 1593.1000000238419,
 1619.1000000238419,
 1653.0,
 1682.1999999284744,
 1710.0,
 1734.5,
 1765.1000000238419,
 1786.2999999523163,
 1814.8999999761581,
 1841.6000000238419,
 1866.1999999284744,
 1892.6999999284744,
 1915.1999999284744,
 1938.5,
 1968.8999999761581,
 2002.1000000238419,
 2037.7999999523163,
 2067.600000023842,
 2101.100000023842,
 2137.399999976158,
 2168.6999999284744,
 2205.1999999284744,
 2234.600000023842,
 2281.6999999284744,
 2331.7999999523163,
 2403.0,
 2433.7999999523163,
 2481.7999999523163,
 2548.399999976158,
 2598.399999976158,
 2648.399999976158,
 2701.2999999523163,
 2764.5,
 2835.1999999284744,
 2867.100000023842,
 2914.2999999523163,
 2935.2999999523163,
 2971.899999976158,
 3001.100

In [105]:
def get_result(gaze):
    gaze = torch.tensor(gaze)
    gaze=gaze.unsqueeze(dim=0)
    gaze=gaze.unsqueeze(dim=0)
    outputs = net(gaze)
    return outputs

In [109]:
import pandas as pd

df = pd.DataFrame({
    'gaze_x':gaze_x,
    'gaze_y':gaze_y,
    'gaze_t':gaze_t,
    'gaze_x_cnn':get_result(gaze_x).squeeze().detach().numpy(),
    'gaze_y_cnn':get_result(gaze_y).squeeze().detach().numpy(),
    'gaze_t_cnn':get_result(gaze_t).squeeze().detach().numpy(),
    'gaze_x_filter':preprocess_data(gaze_x),
    'gaze_y_filter':preprocess_data(gaze_x),
    
})

path = "static\\data\\dataset\\cnn.csv"
df.to_csv(path, index=False)
