In [5]:
import json
import pandas as pd
from collections import Counter
import torch
import random
from torch import nn
import torch.nn.functional as F
from torchtext.vocab import vocab
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from timeit import default_timer as timer
from tqdm import tqdm
import math

In [6]:
df = pd.read_csv("/kaggle/input/amod-mental-health-counseling-conversations-data/train.csv")
df.sample(5)

Unnamed: 0,Context,Response
1631,My girlfriend broke up with me five months ago...,"New York, what would it mean about you if you ..."
3503,My daughter seemed to be developing at a norma...,Hello.It sounds like you are really concerned ...
2369,People who are parental figures in my life hav...,It sounds like you have been thinking about ho...
2370,That phrase makes me crazy. It happens anytime...,It's hard to say what is okay and what is not ...
2841,"Two years ago, I was separated from the milita...",It sounds like being separated from the armed ...


In [7]:
df = df.dropna()

In [8]:
df["Context"] = df["Context"].apply(lambda x: x.lower())
df["Response"] = df["Response"].apply(lambda x: x.lower())

In [9]:
df["Context"] = df["Context"].apply(lambda x: x.replace("\n",""))
df["Response"] = df["Response"].apply(lambda x: x.replace("\n",""))

In [10]:
# df["context_len"] = df["Context"].apply(lambda x: len(x.split()))
# df["response_len"] = df["Response"].apply(lambda x: len(x.split()))

In [11]:
# df = df[df["context_len"]<400]
# df = df[df["response_len"]<600]

In [12]:
# df[["context_len","response_len"]].describe()

In [13]:
counter = Counter()
for i in range(len(df)):
    row = df.iloc[i]
    counter.update(row["Context"].split())
    counter.update(row["Response"].split())

In [14]:
vocab_en = vocab(counter, min_freq=5, specials=('<UNK>', '<SOS>', '<EOS>', '<PAD>'))

In [15]:
vocab_en.set_default_index(vocab_en['<UNK>'])
len(vocab_en)

7805

In [16]:
df["Context"] = df["Context"].apply(lambda x: "<SOS> "+x+" <EOS>")
df["Response"] = df["Response"].apply(lambda x: "<SOS> "+x+" <EOS>")

In [17]:
len(df.iloc[0]["Context"].split())

62

In [18]:
vocab_en(["hi","hello"])

[822, 1376]

In [19]:
class MyDataset(Dataset):
    def __init__(self, df):
        self.X = df["Context"]
        self.y = df["Response"]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        qn = self.X.iloc[idx]
        ans = self.y.iloc[idx]

        qn_indices = vocab_en(qn.split())

        ans_indices = vocab_en(ans.split())

        return torch.tensor(qn_indices), torch.tensor(ans_indices)


In [20]:
ds = MyDataset(df)

In [21]:
train_ds,test_ds = torch.utils.data.random_split(ds, [0.8, 0.2])

In [22]:
BATCH_SIZE = 1

In [23]:
def my_collate(batch):
    # Extract sequences and targets
    qns = [item[0] for item in batch]
    ans = [item[1] for item in batch]
    # Pad sequences
    padded_qn = pad_sequence(qns, padding_value=vocab_en["<PAD>"])
    padded_ans = pad_sequence(ans, padding_value=vocab_en["<PAD>"])

    # Return padded sequences and targets
    return padded_qn, padded_ans

In [24]:
# helper Module that adds positional encoding to the token embedding to introduce a notion of word order.
class PositionalEncoding(nn.Module):
    def __init__(self,
                 emb_size: int,
                 dropout: float,
                 maxlen: int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])

In [25]:
# helper Module to convert tensor of input indices into corresponding tensor of token embeddings
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

In [26]:
# Seq2Seq Network
class Seq2SeqTransformer(nn.Module):
    def __init__(self,
                 num_encoder_layers: int,
                 num_decoder_layers: int,
                 emb_size: int,
                 nhead: int,
                 src_vocab_size: int,
                 tgt_vocab_size: int,
                 dim_feedforward: int = 512,
                 dropout: float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        self.transformer = nn.Transformer(d_model=emb_size,
                                       nhead=nhead,
                                       num_encoder_layers=num_encoder_layers,
                                       num_decoder_layers=num_decoder_layers,
                                       dim_feedforward=dim_feedforward,
                                       dropout=dropout)
        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(
            emb_size, dropout=dropout)

    def forward(self,src, trg,src_mask,tgt_mask,src_padding_mask,tgt_padding_mask,memory_key_padding_mask):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
        outs = self.transformer(src_emb, tgt_emb, src_mask, tgt_mask, None,
                                src_padding_mask, tgt_padding_mask, memory_key_padding_mask)
        return self.generator(outs)

    def encode(self, src, src_mask):
        return self.transformer.encoder(self.positional_encoding(
                            self.src_tok_emb(src)), src_mask)

    def decode(self, tgt, enc_context, tgt_mask):
        return self.transformer.decoder(self.positional_encoding(
                          self.tgt_tok_emb(tgt)), enc_context,
                          tgt_mask)

In [27]:
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz, sz), device=DEVICE)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask


def create_mask(src, tgt):
    src_seq_len = src.shape[0]
    tgt_seq_len = tgt.shape[0]

    tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
    src_mask = torch.zeros((src_seq_len, src_seq_len),device=DEVICE).type(torch.bool)

    src_padding_mask = (src == PAD_IDX).transpose(0, 1)
    tgt_padding_mask = (tgt == PAD_IDX).transpose(0, 1)
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

In [28]:
torch.manual_seed(0)

SRC_VOCAB_SIZE = len(vocab_en)
TGT_VOCAB_SIZE = len(vocab_en)
EMB_SIZE = 512
NHEAD = 8
FFN_HID_DIM = 512
BATCH_SIZE = 16
NUM_ENCODER_LAYERS = 6
NUM_DECODER_LAYERS = 6
PAD_IDX = vocab_en["<PAD>"]
BOS_IDX = vocab_en["<SOS>"]
EOS_IDX = vocab_en["<EOS>"]

In [29]:
transformer = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE,
                                 NHEAD, SRC_VOCAB_SIZE, TGT_VOCAB_SIZE, FFN_HID_DIM)

In [30]:
for p in transformer.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

In [31]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [32]:
transformer = transformer.to(DEVICE)
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=PAD_IDX)
optimizer = torch.optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

In [33]:
def train_epoch(model, optimizer):
    model.train()
    losses = 0
    train_dataloader = DataLoader(train_ds, batch_size=BATCH_SIZE, collate_fn=my_collate)
    pbar = tqdm(train_dataloader)
    for src, tgt in pbar:
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)

        tgt_input = tgt[:-1, :]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input, src_mask, tgt_mask,src_padding_mask, tgt_padding_mask, src_padding_mask)

        optimizer.zero_grad()

        tgt_out = tgt[1:, :]
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        loss.backward()

        optimizer.step()
        losses += loss.item()

    return losses / len(list(train_dataloader))

In [34]:
def evaluate(model):
    model.eval()
    losses = 0

    val_dataloader = DataLoader(test_ds, batch_size=BATCH_SIZE, collate_fn=my_collate)
    pbar = tqdm(val_dataloader)
    for src, tgt in pbar:
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)

        tgt_input = tgt[:-1, :]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input, src_mask, tgt_mask,src_padding_mask, tgt_padding_mask, src_padding_mask)

        tgt_out = tgt[1:, :]
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        losses += loss.item()

    return losses / len(list(val_dataloader))

In [35]:
import gc
gc.collect()

670

In [34]:
NUM_EPOCHS = 100

In [58]:
for epoch in range(1, NUM_EPOCHS+1):
    start_time = timer()
    train_loss = train_epoch(transformer, optimizer)
    end_time = timer()
    val_loss = evaluate(transformer)
    print((f"Epoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}, "f"Epoch time = {(end_time - start_time):.3f}s"))
    torch.save(transformer.state_dict(), "transformer.pt")

100%|██████████| 176/176 [00:49<00:00,  3.58it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 1, Train loss: 2.378, Val loss: 3.531, Epoch time = 49.633s


100%|██████████| 176/176 [00:49<00:00,  3.58it/s]
100%|██████████| 44/44 [00:04<00:00, 10.53it/s]


Epoch: 2, Train loss: 2.301, Val loss: 3.494, Epoch time = 49.600s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.50it/s]


Epoch: 3, Train loss: 2.232, Val loss: 3.456, Epoch time = 49.597s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.50it/s]


Epoch: 4, Train loss: 2.161, Val loss: 3.425, Epoch time = 49.571s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 5, Train loss: 2.090, Val loss: 3.400, Epoch time = 49.596s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.53it/s]


Epoch: 6, Train loss: 2.025, Val loss: 3.358, Epoch time = 49.581s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.48it/s]


Epoch: 7, Train loss: 1.961, Val loss: 3.323, Epoch time = 49.586s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 8, Train loss: 1.896, Val loss: 3.288, Epoch time = 49.577s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 9, Train loss: 1.835, Val loss: 3.251, Epoch time = 49.596s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 10, Train loss: 1.773, Val loss: 3.213, Epoch time = 49.567s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.50it/s]


Epoch: 11, Train loss: 1.717, Val loss: 3.181, Epoch time = 49.599s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 12, Train loss: 1.658, Val loss: 3.143, Epoch time = 49.600s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.53it/s]


Epoch: 13, Train loss: 1.601, Val loss: 3.111, Epoch time = 49.605s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.56it/s]


Epoch: 14, Train loss: 1.549, Val loss: 3.080, Epoch time = 49.618s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.51it/s]


Epoch: 15, Train loss: 1.497, Val loss: 3.058, Epoch time = 49.550s


100%|██████████| 176/176 [00:49<00:00,  3.58it/s]
100%|██████████| 44/44 [00:04<00:00, 10.49it/s]


Epoch: 16, Train loss: 1.447, Val loss: 3.031, Epoch time = 49.674s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.55it/s]


Epoch: 17, Train loss: 1.396, Val loss: 3.006, Epoch time = 49.605s


100%|██████████| 176/176 [00:49<00:00,  3.58it/s]
100%|██████████| 44/44 [00:04<00:00, 10.49it/s]


Epoch: 18, Train loss: 1.350, Val loss: 2.966, Epoch time = 49.623s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.53it/s]


Epoch: 19, Train loss: 1.305, Val loss: 2.941, Epoch time = 49.604s


100%|██████████| 176/176 [00:49<00:00,  3.59it/s]
100%|██████████| 44/44 [00:04<00:00, 10.52it/s]


Epoch: 20, Train loss: 1.261, Val loss: 2.918, Epoch time = 49.589s


In [35]:
# torch.save(transformer.state_dict(), "transformer.pt")

In [36]:
import gc
gc.collect()

0

In [37]:
transformer.load_state_dict(torch.load("/kaggle/input/mentaltransfromer/transformer.pt"))
transformer = transformer.eval()

In [55]:
itos = vocab_en.get_itos()

In [56]:
# function to generate output sequence using greedy algorithm
def greedy_decode(model, src, src_mask, max_len, start_symbol):
    src = src.to(DEVICE)
    src_mask = src_mask.to(DEVICE)

    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).to(DEVICE)
    for i in range(max_len-1):
        memory = memory.to(DEVICE)
        tgt_mask = (generate_square_subsequent_mask(ys.size(0))
                    .type(torch.bool)).to(DEVICE)
        out = model.decode(ys, memory, tgt_mask)
        out = out.transpose(0, 1)
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim=1)
        next_word = next_word.item()

        ys = torch.cat([ys,
                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
        if next_word == EOS_IDX:
            break
    return ys


# actual function to translate input sentence into target language
def get_response(model, src_sentence, max_len=70, get_score=False):
    model.eval()
    if not get_score:
        src = vocab_en(src_sentence.split())
        src.insert(0,vocab_en["<BOS>"])
        src.append(vocab_en["<EOS>"])
        src = torch.tensor(src).unsqueeze(1)
    if get_score:
        src = src_sentence
    num_tokens = src.shape[0]
    src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
    tgt_tokens = greedy_decode(
        model,  src, src_mask, max_len=num_tokens + max_len, start_symbol=BOS_IDX).flatten()
    
    return " ".join([itos[i] for i in tgt_tokens if i not in [0,1,2,3]])

In [78]:
while True:
    you = input("YOU > ")
    if you == "q" or you == "quit":
        break

    bot = get_response(transformer, you).split()
    print(f"""BOT > {" ".join(bot[:30])}
{' ' * 5}{" ".join(bot[30:60])}
{' ' * 5}{" ".join(bot[60:])if len(bot)>60 else ""}\n""")
print(f"\nBOT > Bye! Have a good day dear.")

YOU >  What coping mechanisms do you recommend for managing negative thoughts


BOT > i am so sorry to hear about what happened to you! what you are describing is being in a state of shock. you haven't suddenly become a sociopath - this
     is a normal reaction to an event that is completely overwhelming. there are most likely too many feelings to get started in real mind to start with. the first approach
     to get some trauma counseling with a therapist, i hope this helps.



YOU >  I had a panic attack earlier. What should I do to calm down


BOT > i would suggest keeping a of those days when you are having a hard time. items to write would be what were you doing before you felt this way, did
     you eat and what, what time of day is it, how much sleep did you get that night, this can help you identify any triggers. further assessment can be made
     by a health care professional. it does sound like you are experiencing some symptoms of anxiety.



YOU >   Lately, I've been feeling overwhelmed by a sense of worthlessness and hopelessness. These feelings seem to come and go, but they're significantly impacting my daily life. Can you offer some words of understanding and guidance on how to cope with these challenging emotions?


BOT > i am so sorry that this happened to you! i hope you have some people you find emotionally supportive around you! in your question, i understand what you are talking
     about. sometimes when a person experiences a person experiences a person to get the question, and it is really important to know what is being able to get yourself some
     information about sounds as an always occurs from an image and now that a changes that may mean that you are not working toward the past and a pattern of others, but it is also common response of others, they can certainly help people define the people who are learning to people



YOU >  quit



BOT > Bye! Have a good day dear.
