In [520]:
### packages

import pandas as pd
import numpy as np
import itertools
import ast
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import LayerNorm
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [521]:
### read_csv

data = pd.read_csv('./data/formal_data/Train_data/new_train.csv', usecols=['post_id', 'post_content', 'idx', 'response_content', 'topic', 'Advice', 'Agreement', 'Answer', 'Apology', 'AskHelp', 'Blessings', 'Command', 'Commitment', 'Concern', 'Decision', 'Denial', 'Deterrence', 'Emotion', 'Gratitude', 'Invitation', 'Malicious', 'Opinion', 'Persuasion', 'Refusal', 'Sarcasm', 'Sympathy'])
data = data.sort_values(['post_id', 'idx'], ascending=[True, True])
# data = data.head(5)
data = data.reset_index()
data = data[~data['post_id'].isin(list(set(data[data['Advice'].isna()]['post_id'].tolist())))]
temp = data.groupby('post_id')['response_content'].apply(list).reset_index(name='response')['response'].tolist()
new_temp = []
for i in range(len(temp)):
    temp[i].pop()
    temp[i].insert(0, np.nan)
    new_temp.append(temp[i])

data['prev_response'] = list(itertools.chain(*new_temp))
data['prev_response'] = data['prev_response'].fillna(data['post_content'])

data = data[~data['post_id'].isin(list(set(data[data['Advice'].isna()]['post_id'].tolist())))]
temp = data.groupby('post_id')['response_content'].apply(list).reset_index(name='response')['response'].tolist()
new_temp = []
for i in range(len(temp)):
    temp[i].pop()
    temp[i].insert(0, np.nan)
    new_temp.append(temp[i])

data['prev_response'] = list(itertools.chain(*new_temp))
data['prev_response'] = data['prev_response'].fillna(data['post_content'])
data['topic'] = data['topic'].apply(lambda x:' '.join(list(ast.literal_eval(x).keys())))
data = data.astype(str)

for i in data[['Advice', 'Agreement', 'Answer', 'Apology', 'AskHelp', 'Blessings', 'Command', 'Commitment', 'Concern', 'Decision', 'Denial', 'Deterrence', 'Emotion', 'Gratitude', 'Invitation', 'Malicious', 'Opinion', 'Persuasion', 'Refusal', 'Sarcasm', 'Sympathy']].columns:
    data[i].replace('1.0', i, inplace=True)
    data[i].replace('0.0', '', inplace=True)
data["social"] = data[['Advice', 'Agreement', 'Answer', 'Apology', 'AskHelp', 'Blessings', 'Command', 'Commitment', 'Concern', 'Decision', 'Denial', 'Deterrence', 'Emotion', 'Gratitude', 'Invitation', 'Malicious', 'Opinion', 'Persuasion', 'Refusal', 'Sarcasm', 'Sympathy']].agg(" ".join, axis=1)

data = data[['post_content', 'prev_response', 'response_content', 'topic', 'social']]
data = data.rename(columns={"post_content": "post", "response_content": "next_response"})

In [522]:
class CustomDataset(Dataset):
    def __init__(self, data):
        self.post = data['post']
        self.prev_response = data['prev_response']
        self.next_response = data['next_response']
        self.social = data['social']
        self.topic = data['topic']

    def __len__(self):
        return len(self.post)

    def __getitem__(self, idx):
        return {'post': self.post[idx], 'prev_response': self.prev_response[idx], 'next_response': self.next_response[idx], 'social': self.social[idx], 'topic': self.topic[idx]}

# Create a custom dataset and data loaderx=
dataset = CustomDataset(data)
dataloader = DataLoader(dataset=dataset, batch_size=5, shuffle=True)
dataiter = iter(dataloader)
data = next(dataiter)

In [523]:
# 定義生成器（Generator_process）
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

# 定義鑑別器（Discriminator_process）
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.model(x)

In [524]:
class Generator_process:
    def __init__(self, post, prev_sentence, next_sentence, social, topic):
        self.input_dim = 768
        self.hidden_dim = 256
        self.prev_sentence = prev_sentence
        self.next_sentence = next_sentence
        self.post = post
        self.social = social
        self.topic = topic
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
        self.bert_model = BertModel.from_pretrained('bert-base-chinese')

    def last_hidden_states(self, text):
        inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True)
        with torch.no_grad():
            outputs = self.bert_model(**inputs)
        last_hidden_states = outputs.last_hidden_state
        return last_hidden_states
    
    class PriorMLP(nn.Module):
        def __init__(self, input_dim, hidden_dim):
            super(Generator_process.PriorMLP, self).__init__()
            self.fc1 = nn.Linear(input_dim, hidden_dim)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(hidden_dim, input_dim * 2)  # output mean and log-variance
        
        def forward(self, x):
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            return x

    class RecogMLP(nn.Module):
        def __init__(self, input_dim, hidden_dim):
            super(Generator_process.RecogMLP, self).__init__()
            self.fc1 = nn.Linear(input_dim * 2, hidden_dim)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(hidden_dim, input_dim * 2)  # output mean and log-variance
        
        def forward(self, x):
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            return x

    class MLP(nn.Module):
        def __init__(self, input_dim, output_dim):
            super(Generator_process.MLP, self).__init__()
            self.fc = nn.Linear(input_dim, output_dim)
        
        def forward(self, x):
            x = self.fc(x)
            return x

    class DecoderWithMLP(nn.Module):
        def __init__(self, input_dim, hidden_dim):
            super(Generator_process.DecoderWithMLP, self).__init__()
            self.hidden_layer = nn.Linear(input_dim, hidden_dim)
            self.mlp_layer = nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            )
            self.output_layer = nn.Linear(hidden_dim, input_dim)
            self.activation = nn.ReLU()  # 使用ReLU激活函数
    
        def forward(self, x):
            x = self.activation(self.hidden_layer(x))
            x = self.mlp_layer(x)
            x = self.output_layer(x)
            return x
            
    def decode(self, result_decode):
        final_decoder = Generator_process.DecoderWithMLP(self.input_dim, self.hidden_dim)
        output = final_decoder(result_decode)
        return output
        
    def topic_social(self, hidden_feature):
        seq_length, hidden_size = hidden_feature.size(1), hidden_feature.size(2)
        flatten_hidden_states = hidden_feature.view(-1, seq_length * hidden_size)
        mlp_model = Generator_process.MLP(seq_length * hidden_size, 768)
        output = mlp_model(flatten_hidden_states)
        return output
    
    def priorNrecog(self):
        prev_last_hidden_states = self.last_hidden_states(self.prev_sentence)
        next_last_hidden_states = self.last_hidden_states(self.next_sentence)
        # 初始化 MLP 模型
        prior_mlp_model = self.PriorMLP(self.input_dim, self.hidden_dim)
        recog_mlp_model = self.RecogMLP(self.input_dim, self.hidden_dim)
        
        # 取句子中的某個字的隱藏特徵 (假設取第1個字)
        prev_hidden_feature = prev_last_hidden_states[0, 0, :]
        next_hidden_feature = next_last_hidden_states[0, 0, :]
        # 將兩個字的隱藏特徵串聯起來
        combined_features = torch.cat((prev_hidden_feature, next_hidden_feature), dim=-1)
        # 增加 batch 維度
        prev_hidden_feature = prev_hidden_feature.unsqueeze(0)
        combined_features = combined_features.unsqueeze(0)
        # 計算 MLP 的輸出 (mean 和 log-variance)
        prev_output = prior_mlp_model(prev_hidden_feature)
        combined_output = recog_mlp_model(combined_features)
        # 分離 mean 和 log-variance
        prev_mean, prev_log_var = prev_output.chunk(2, dim=-1)
        combined_mean, combined_log_var = combined_output.chunk(2, dim=-1)
        # 計算標準差 (diagonal covariance)
        prev_std = torch.exp(0.5 * prev_log_var)
        combined_std = torch.exp(0.5 * combined_log_var)
        linear_layer = nn.Linear(self.input_dim, self.input_dim)
        prev_final_output = linear_layer(prev_mean)
        combined_output = linear_layer(combined_mean)
        return prev_hidden_feature, prev_final_output, combined_output

In [525]:
class Discriminator_process:
    def __init__(self, fake_response, prev_sentence, next_sentence):
        self.fake_response = fake_response
        self.prev_sentence = prev_sentence
        self.next_sentence = next_sentence

    class MLP(nn.Module):
        def __init__(self, input_size, output_size):
            super(Discriminator_process.MLP, self).__init__()
            self.fc1 = nn.Linear(input_size, output_size)
            self.relu = nn.ReLU()
        def forward(self, x):
            x = self.fc1(x)
            x = self.relu(x)
            return x
    
    class CNNMaxPoolingFeatureExtractor(nn.Module):
        def __init__(self, in_channels, out_channels, kernel_size, pool_size):
            super(Discriminator_process.CNNMaxPoolingFeatureExtractor, self).__init__()
            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size)
            self.pool = nn.MaxPool2d(pool_size, stride=pool_size)
        def forward(self, x):
            x = self.conv1(x)
            x = F.relu(x)
            x = self.pool(x)
            return x
    
    class SimilarityScore(nn.Module):
        def __init__(self, input_size):
            super(Discriminator_process.SimilarityScore, self).__init__()
            self.fc = nn.Linear(input_size, 1)
            self.sigmoid = nn.Sigmoid()
        def forward(self, x):
            x = self.fc(x)
            x = self.sigmoid(x)
            return x
    
    def mg(self):
        mlp = Generator_process.MLP(768, self.prev_sentence.size()[1] * 768)
        fake_response_mapped = mlp(self.fake_response).view(1, self.prev_sentence.size()[1], 768)
        inner_product = torch.matmul(fake_response_mapped, self.prev_sentence.transpose(1, 2))
        inner_product = inner_product.unsqueeze(1)
        cnn_pool_extractor = CNNMaxPoolingFeatureExtractor(1, 10, (3, 3), (2, 2))
        features = cnn_pool_extractor(inner_product)
        features_flattened = features.view(features.size(0), -1)
        similarity_model = SimilarityScore(features_flattened.size(1))
        similarity_score = similarity_model(features_flattened)
        return similarity_score
        
    def mt(self):
        mlp = Generator_process.MLP(self.next_sentence.size()[1] * 768, self.prev_sentence.size()[1] * 768)  # 输入大小为A向量大小，输出大小为B向量大小
        next_sentence_mapped = mlp(self.next_sentence.view(1, -1)).view(1, self.prev_sentence.size()[1], 768)
        inner_product = torch.matmul(next_sentence_mapped, self.prev_sentence.transpose(1, 2))
        inner_product = inner_product.unsqueeze(1)
        cnn_pool_extractor = CNNMaxPoolingFeatureExtractor(1, 10, (3, 3), (2, 2))
        features = cnn_pool_extractor(inner_product)
        features_flattened = features.view(features.size(0), -1)
        similarity_model = SimilarityScore(features_flattened.size(1))
        similarity_score = similarity_model(features_flattened)
        return similarity_score

In [None]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
import math

generator = Generator()
discriminator = Discriminator()
optimizer_generator = optim.Adam(generator.parameters(), lr=0.00005)
optimizer_discriminator = optim.Adam(discriminator.parameters(), lr=0.0002)


num_epochs = 10
for epoch in range(num_epochs):
    for idx in range(len(data)):
    # for idx in [0]:
        result_vector = torch.zeros(1, 768)
        
        post = data['post'][idx]
        prev_response = data['prev_response'][idx]
        next_response = data['next_response'][idx]
        social = data['social'][idx]
        topic = data['topic'][idx]

        generator_process = Generator_process(post, prev_response, next_response, social, topic)
        prev_hidden_feature, prev_final_output, combined_output = generator_process.priorNrecog()
        post_hidden_feature = generator_process.last_hidden_states(post)[0, 0, :].unsqueeze(0)

        prev_response_hidden_feature = generator_process.last_hidden_states(prev_response)
        next_response_hidden_feature = generator_process.last_hidden_states(next_response)
        
        social_hidden_feature = generator_process.last_hidden_states(social)
        social_hidden_feature = generator_process.topic_social(social_hidden_feature)
        topic_hidden_feature = generator_process.last_hidden_states(topic)
        topic_hidden_feature = generator_process.topic_social(topic_hidden_feature)

        # 進行特徵向量之間的 ⊕ 運算
        for vector in [prev_hidden_feature, prev_final_output, combined_output, post_hidden_feature, social_hidden_feature, topic_hidden_feature]:
            result_vector += vector
        fake_response = generator_process.decode(result_vector)
 
        discriminator.zero_grad()
        
        discriminator_process = Discriminator_process(fake_response, prev_response_hidden_feature, next_response_hidden_feature)
        mg = discriminator_process.mg()
        mt = discriminator_process.mt()
        d_loss = -(torch.log(mt) + torch.log(1 - mg)).mean()
        d_loss.backward()
        optimizer_discriminator.step()

    # 訓練生成器
    generator.zero_grad
    kl_div =  F.kl_div(F.log_softmax(prev_final_output, dim=-1), F.softmax(combined_output, dim=-1), reduction='batchmean')
    reconstruction_term = F.mse_loss(prev_final_output, combined_output)
    g_loss = kl_div + reconstruction_term
    g_loss = torch.tensor(g_loss.item(), requires_grad=True)
    g_loss.backward()
    optimizer_generator.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Generator Loss: {g_loss.item()}, Discriminator Loss: {d_loss.item()}")

Epoch [1/10], Generator Loss: 0.03695230931043625, Discriminator Loss: 0.6556451916694641
Epoch [2/10], Generator Loss: 0.031797852367162704, Discriminator Loss: 4.029597282409668
Epoch [3/10], Generator Loss: 0.04376428574323654, Discriminator Loss: 2.616647243499756
Epoch [4/10], Generator Loss: 0.035848550498485565, Discriminator Loss: 0.7132371068000793
