In [1]:
import numpy as np
import torch

import models.TextCNN as model
from utils import DataConfig

np.random.seed(3407)
torch.manual_seed(3407)
torch.cuda.manual_seed_all(3407)
torch.backends.cudnn.deterministic = True  # 保证每次结果一样
torch.backends.cudnn.benchmark = False
UNK, PAD = '<UNK>', '<PAD>'  # 未知字，padding符号

In [2]:
data_config = DataConfig('ship_data', 'embedding.npz')
model_config = model.Config()

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as f



def conv_and_pool(x, conv):
    x = f.relu(conv(x))
    x = x.squeeze(3)
    x = f.max_pool1d(x, x.size(2)).squeeze(2)
    return x


class Model(nn.Module):
    def __init__(self, model_config, data_config):
        super(Model, self).__init__()
        if data_config.embedding_pretrained is not None:
            self.embedding_1 = nn.Embedding.from_pretrained(data_config.embedding_pretrained, freeze=False)
        else:
            self.embedding_1 = nn.Embedding(data_config.n_vocab, data_config.embed,
                                            padding_idx=data_config.n_vocab - 1)
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, model_config.num_filters, (k, data_config.embed)) for k in model_config.filter_sizes])
        self.dropout = nn.Dropout(model_config.dropout)
        self.fc_layers = nn.Sequential(
            nn.Linear(model_config.num_filters * len(model_config.filter_sizes),
                      model_config.num_filters * len(model_config.filter_sizes) // 2),
            nn.Linear(model_config.num_filters * len(model_config.filter_sizes) // 2,
                      model_config.num_filters * len(model_config.filter_sizes) // 4),
            nn.Linear(model_config.num_filters * len(model_config.filter_sizes) // 4, data_config.num_classes))

    def forward(self, x):
        out = self.embedding_1(x)
        out = out.unsqueeze(1)  # 插入维度 进行卷积运算
        out = torch.cat([conv_and_pool(out, conv) for conv in self.convs], 1)
        out = self.dropout(out)
        out = self.fc_layers(out)
        return out


In [4]:
model = Model(model_config,data_config).to(data_config.device)

In [5]:
model(torch.randint(1, 10, [64, 30]).to(data_config.device)).size()


torch.Size([64, 5])

In [6]:
from torchinfo import summary

summary(model, input_size=(1, 30),dtypes=[torch.long])

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [1, 5]                    --
├─Embedding: 1-1                         [1, 30, 100]              72,700
├─ModuleList: 1-2                        --                        --
│    └─Conv2d: 2-1                       [1, 256, 29, 1]           51,456
│    └─Conv2d: 2-2                       [1, 256, 28, 1]           77,056
│    └─Conv2d: 2-3                       [1, 256, 27, 1]           102,656
│    └─Conv2d: 2-4                       [1, 256, 26, 1]           128,256
├─Dropout: 1-3                           [1, 1024]                 --
├─Sequential: 1-4                        [1, 5]                    --
│    └─Linear: 2-5                       [1, 512]                  524,800
│    └─Linear: 2-6                       [1, 256]                  131,328
│    └─Linear: 2-7                       [1, 5]                    1,285
Total params: 1,089,537
Trainable params: 1,089,53

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 读取CSV文件
df = pd.read_csv('./ship_data/experiment_data.csv')  # 替换为你的CSV文件路径

# 划分数据集
train_df, temp_df = train_test_split(df, test_size=0.1, random_state=3407)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=3407)

# 保存划分后的数据集为新的CSV文件
train_df.to_csv('train_dataset.csv', index=False)
val_df.to_csv('val_dataset.csv', index=False)
test_df.to_csv('test_dataset.csv', index=False)
