In [None]:

import typing
from dataclasses import dataclass

import torch
import torch.nn as nn

In [None]:
batch = 32
num_heads = 8
embedding = 512
num_layers = 4
embedding % num_heads == 0

In [None]:
class Conv(nn.Module):
    def __init__(self, c1, c2):
        super(Conv, self).__init__()
        self.c2 = c2
        w = torch.empty(c1, c2)
        nn.init.normal_(w, std=0.2)
        self.w = nn.Parameter(w)
        self.b = nn.Parameter(torch.zeros(c2))

    def forward(self, x):
        new_shape = x.size()[:-1] + (self.c2,)
        x = torch.addmm(self.b, x.view(-1, x.size(-1)), self.w).view(new_shape)
        return x

In [None]:
# test
Conv(5, 15)(torch.ones(15, 1, 5)).shape

def _split_heads(tensor: torch.Tensor):
    new_shape = tensor.size()[:-1] + (num_heads, embedding // num_heads)
    tensor = tensor.view(new_shape).permute(0, 2, 1, 3)
    return tensor

def _merge_heads(tensor: torch.Tensor):
    tensor = tensor.permute(0, 2, 1, 3)
    new_shape = tensor.size()[:-2] + (num_heads * (embedding // num_heads),)
    return tensor.view(new_shape)

tensor_input = torch.ones(batch, 30, embedding, requires_grad=True)
tensor_input.shape

In [None]:
@dataclass
class Config:
    num_embedding: int = 512
    num_heads: int = 8
    max_len: int = 256
    vocab_size: int = 5000
    num_layers: int = 2
    scale_attn_by_layer_idx: bool = False
    use_mask: bool = True
    attn_dropout: float = 0.2
    residual_dropout: float = 0.2
    activation = 'new_gelu'
    hidden_size: int = num_embedding
    max_position_embeddings = max_len
    embd_pdrop: float = 0.1
    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
    intermediate_size: int = num_embedding * 4


class MultiCNNAttention(nn.Module):
    def __init__(self, config, layer_idx=None):
        super(MultiCNNAttention, self).__init__()
        self.layer_idx = layer_idx
        self.embedding = config.hidden_size
        self.num_heads = config.num_heads
        self.num_div = self.embedding // self.num_heads
        self.scale_attn_by_layer_idx = config.scale_attn_by_layer_idx
        self.use_mask = config.use_mask
        if self.num_heads // self.embedding != 0:
            raise ValueError(
                f'hidden_size must be dividable to num_heads {self.num_heads} // {self.embedding} = {self.num_heads // self.embedding}'
            )
        self.c_attn = Conv(self.embedding, self.embedding * 3)
        self.c_proj = Conv(self.embedding, self.embedding)
        self.residual_dropout = nn.Dropout(config.residual_dropout)
        self.attn_dropout = nn.Dropout(config.attn_dropout)
        self.register_buffer('bias', torch.tril(
            torch.ones(config.max_len, config.max_len, dtype=torch.uint8, device=config.device).view(1, 1,
                                                                                                     config.max_len,
                                                                                                     config.max_len)))

        self.register_buffer('masked_bias', torch.tensor(float(-1e4)))

    def _split_heads(self, tensor: torch.Tensor):
        new_shape = tensor.size()[:-1] + (self.num_heads, self.num_div)
        tensor = tensor.view(new_shape).permute(0, 2, 1, 3)
        return tensor

    def _merge_heads(self, tensor: torch.Tensor):
        tensor = tensor.permute(0, 2, 1, 3)
        new_shape = tensor.size()[:-2] + (self.num_heads * self.num_div,)
        return tensor.reshape(new_shape)

    def _attn(self, query, key, value, attention_mask, head_mask):
        attn_weight = torch.matmul(query, key.transpose(-2, -1))

        attn_weight = attn_weight / torch.full([], value.size(-1) ** 0.5, dtype=attn_weight.dtype,
                                               device=attn_weight.device)
        if self.scale_attn_by_layer_idx:
            attn_weight /= self.layer_idx
        if self.use_mask:
            key_len, query_len = key.size(-2), query.size(-2)
            masked = self.bias[:, :, key_len - query_len:query_len, :key_len].to(attn_weight.device)
            attn_weight = attn_weight.masked_fill(masked == 0, self.masked_bias)
        if attention_mask is not None:
            attn_weight = attn_weight + attention_mask
        attn_weight = nn.functional.softmax(attn_weight, dim=-1)
        attn_weight = self.attn_dropout(attn_weight)
        attn_weight = attn_weight.type(value.dtype)
        if head_mask is not None:
            attn_weight = attn_weight * head_mask

        attn_weight = torch.matmul(attn_weight, value)
        return attn_weight

    def forward(self, hidden_state: typing.Optional[torch.Tensor], attention_mask=None, head_mask=None):
        query, key, value = self.c_attn(hidden_state).split(self.embedding, dim=2)
        query = self._split_heads(query)
        key = self._split_heads(key)
        value = self._split_heads(value)
        attn_output = self._attn(query=query, key=key, value=value, attention_mask=attention_mask, head_mask=head_mask)
        attn_output = self.residual_dropout(self.c_proj(self._merge_heads(attn_output)))
        return attn_output

In [None]:
class PGTMLP(nn.Module):
    def __init__(self, config):
        super(PGTMLP, self).__init__()
        self.c_op = Conv(config.hidden_size, config.intermediate_size)
        self.c_proj = Conv(config.intermediate_size, config.hidden_size)
        self.dropout = nn.Dropout(config.residual_dropout)
        # self.act = get_activation(config.activation)
        self.act = nn.GELU()

    def forward(self, hidden_state):
        hidden_state = self.c_op(hidden_state)
        hidden_state = self.act(hidden_state)
        hidden_state = self.c_proj(hidden_state)
        hidden_state = self.dropout(hidden_state)
        return hidden_state

In [None]:
class PGTBlock(nn.Module):
    def __init__(self, config, layer_idx=None):
        super(PGTBlock, self).__init__()
        self.ln1 = nn.LayerNorm(config.hidden_size)
        self.ln2 = nn.LayerNorm(config.hidden_size)
        self.h = MultiCNNAttention(config=config, layer_idx=layer_idx)
        self.mlp = PGTMLP(config)

    def forward(self, hidden_state, attention_mask=None, heads_mask=None):
        residual = hidden_state
        hidden_state = self.ln1(hidden_state)
        hidden_state = self.h(hidden_state, attention_mask, heads_mask) + residual
        residual = hidden_state
        hidden_state = self.ln2(residual)
        hidden_state = self.mlp(hidden_state) + residual
        return hidden_state

In [None]:
class PGT(nn.Module):
    def __init__(self, config):
        super().__init__()

        self.embed_dim = config.hidden_size

        self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
        self.wpe = nn.Embedding(config.max_position_embeddings, self.embed_dim)
        self.max_position_embeddings = config.max_position_embeddings
        self.drop = nn.Dropout(config.embd_pdrop)
        self.h = nn.ModuleList([PGTBlock(config, layer_idx=i) for i in range(config.num_layers)])
        self.ln_f = nn.LayerNorm(self.embed_dim)

        # Model parallel
        self.model_parallel = False
        self.device_map = None
        self.gradient_checkpointing = False

        # Initialize weights and apply final processing

    def get_input_embeddings(self):
        return self.wte

    def set_input_embeddings(self, new_embeddings):
        self.wte = new_embeddings

    def forward(self, inputs: typing.Optional[torch.LongTensor], attention_mask=None, heads_mask=None):
        token_embeddings = self.wte(inputs)
        pos_embeddings = self.wpe(torch.arange(0, inputs.size(-1), dtype=inputs.dtype, device=inputs.device))
        hidden = self.drop(token_embeddings + pos_embeddings)
        for m in self.h:
            hidden = m(hidden, attention_mask=attention_mask, heads_mask=heads_mask)
        hidden = self.ln_f(hidden)
        return hidden

In [None]:
m = PGT(config=Config)

In [None]:
sum(mm.numel() for mm in m.parameters()) / 1e6

In [None]:
x = torch.tensor([[1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 8, 7, 102, 562]])

In [None]:
print(x.size())

In [None]:
ss = m(x)

In [None]:
ss.size()

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('topical_chat.csv')

In [None]:
df = df.drop(columns='sentiment')

In [None]:
df.head()

In [None]:
start = 1


In [None]:
data_id = df['conversation_id']
data_msg = df['message']

In [None]:
data_msg

In [None]:
data = []
for i, m in zip(data_id, data_msg):
    data.append({
        'id': i,
        'message': m
    })

In [None]:
total_coms = len(list(set(df['conversation_id'])))
list_coms = list(set(df['conversation_id']))
start = list_coms[0]
string_data = ''
global question
set_a = 0
for i, d in enumerate(data):
    if d['id'] == start:
        start += 1
        question = d['message']
        set_a = i
    else:
        if set_a + 1 == i:
            answer = d['message']
            string_data += f'[SEP]{question}[SEP]{answer}\n'

In [None]:
string_data

In [None]:
with open('QA.txt', 'w', encoding='utf8') as wr:
    wr.write(string_data)

In [None]:
!pwd

In [None]:
import os

print(os.getcwd())

In [None]:
from threading import Thread
import time


def fp(income: int):
    for o in range(5):
        time.sleep(5)
        print(o, f" {income}")


c = [Thread(target=fp, args=(i,), daemon=True) for i in range(4)]
for t in c:
    print(f'Starting {t}')
    t.start()

for t in c:
    print(f'Running {t}')
    t.run()

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('topical_chat.csv')

In [4]:
df = df.drop(columns='sentiment')

In [5]:
df.head()

Unnamed: 0,conversation_id,message
0,1,Are you a fan of Google or Microsoft?
1,1,Both are excellent technology they are helpfu...
2,1,"I'm not a huge fan of Google, but I use it a..."
3,1,Google provides online related services and p...
4,1,"Yeah, their services are good. I'm just not a..."


In [6]:
start = 1


In [7]:
data_id = df['conversation_id']
data_msg = df['message']

In [None]:
data_msg

In [8]:
data = []
for i, m in zip(data_id, data_msg):
    data.append({
        'id': i,
        'message': m
    })

In [12]:
total_coms = len(list(set(df['conversation_id'])))
list_coms = list(set(df['conversation_id']))
start = list_coms[0]
string_data = ''
global question
set_a = 0
for i, d in enumerate(data):
    if d['id'] == start:
        start += 1
        question = d['message']
        set_a = i
    else:
        if set_a + 1 == i:
            answer = d['message']
            string_data += f'[SEP]{question}[SEP]{answer}\n'

In [13]:
string_data

'[SEP] Are you a fan of Google or Microsoft?[SEP] Both are excellent technology they are helpful in many ways. For the security purpose both are super.\n[SEP] do you like dance?[SEP] Yes  I do. Did you know Bruce Lee was a cha cha dancer?\n[SEP] Hey what\'s up do use Google very often?I really love the company and was surprised to hear that it was founded back in 1998.[SEP] i think everyone must use it daily! its become ingrained in every day life\n[SEP] Hi!  do you like to dance?[SEP] I love to dance a lot. How about you?\n[SEP] do you like dance?[SEP] I love it. Did you know Bruce Lee was a dancer?\n[SEP] hi, do you use google much?[SEP] Yes, I think it is the most effective search engine. How about you? Do you use gmail?\n[SEP] Do you like comic books?[SEP] I do like comic books!\n[SEP] Can you believe there is stars that can be cold enough to be touched ? The universe is so diverse. [SEP] Its amazing and ever evolving. \n[SEP] Are you a Star Wars fan? I need to rewatch the OT and t

In [14]:
with open('QA.txt', 'w', encoding='utf8') as wr:
    wr.write(string_data)

In [37]:
!pwd

'pwd' is not recognized as an internal or external command,
operable program or batch file.


In [39]:
import os

print(os.getcwd())

E:\Programming\Python\Ai-Projects\MODEL P\scripts


In [30]:
from threading import Thread
import time


def fp(income: int):
    for o in range(5):
        time.sleep(5)
        print(o, f" {income}")


c = [Thread(target=fp, args=(i,), daemon=True) for i in range(4)]
for t in c:
    print(f'Starting {t}')
    t.start()

for t in c:
    print(f'Running {t}')
    t.run()

Starting <Thread(Thread-66, initial daemon)>
Starting <Thread(Thread-67, initial daemon)>
Starting <Thread(Thread-68, initial daemon)>
Starting <Thread(Thread-69, initial daemon)>
Running <Thread(Thread-66, started daemon 8456)>
0  2
0  3
0  0
0  1
0  0
1  3
1  2
1  1
1  0
1  0
22  3
2  2
  0
2  1
2  0
33  3
3  1
  2
3  0
3  0
44  0
4  1
4  2
4  3
  0


AttributeError: _target