In [34]:
%python -m spacy download de_core_news_sm
%python -m spacy download en_core_web_sm
#%pip install -r requirements.txt

UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).


In [35]:
import os 
from os.path import exists
import torch
import torch.nn as nn
from torch.nn.functional import log_softmax,pad
import math 
import copy
import time 
from torch.optim.lr_scheduler import LambdaLR

import pandas as pd 
import altair as alt


from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator
import torchtext.datasets as datasets
import spacy
import GPUtil
import warnings 
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP


warnings.filterwarnings("ignore")
RUN_EXAMPLES=True

In [36]:
# 整个notebook的便利函数
def is_interactive_notebook():
    return __name__=="__main__"

def show_example(fn,args=[]):
    if __name__=="__main__" and RUN_EXAMPLES:
        return fn(*args)

def execute_example(fn,args=[]):
    if __name__=="__main__" and RUN_EXAMPLES:
        fn(*args)


class DummyOptimizer(torch.optim.Optimizer):
    def __init__(self):
        self.param_groups=[{"lr":0}]
        None
    def step(self):
        None
    def zero_grad(self,set_to_none=False):
        None

class DummyScheduler:
    def step(self):
        None        

In [37]:
class EncoderDecoder(nn.Module):
    def __init__(self,encoder,decoder,src_embed,tgt_embed,generator) -> None:
        super(EncoderDecoder,self).__init__()
        self.encoder=encoder
        self.decoder=decoder
        self.src_embed=src_embed
        self.tgt_embed=tgt_embed
        self.generator=generator

    def foward(self,src,tgt,src_mask,tgt_mask):
         return self.decoder(self.encode(src,src_mask),src_mask,tgt,tgt_mask)
    #      "Take in and process masked src and target sequences."
    def encode(self,src,src_mask):
        return self.encoder(self.src_embed(src),src_mask)
    
    def decoder(self,memory,src_mask,tgt,tgt_mask):
        
        return self.decoder(self.tgt_embed(tgt),memory,src_mask,tgt_mask)

In [38]:
class Generator(nn.Module):
    # 定义linear+softmax generation step
    def __init__(self,d_model,vocab) -> None:
        super(Generator,self).__init__()
        self.proj=nn.Linear(d_model,vocab)
    
    def forward(self,x):
        return log_softmax(self.proj(x),dim=-1)


In [39]:

def clones(module,N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)  ])

    

In [40]:
class Encoder(nn.Module):
    """Some Information about Encoder"""
    def __init__(self,layer,N):
        super(Encoder, self).__init__()
        self.layers=clones(layer,N)
        self.norm=LayerNorm(layer.size)
    def forward(self, x,mask):
        for layer in self.layers:
            x=layer(x,mask)
        return self.norm(x)

In [41]:
class LayerNorm(nn.Module):
    """Some Information about LayerNorm"""
    def __init__(self,features,eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 =nn.Parameter(torch.ones(features))
        self.b_2=nn.Parameter(torch.zeros(features))
        self.eps=eps
    def forward(self, x):
        mean=x.mean(-1,keepdim=True)
        std=x.std(-1,keepdim=True)

        return self.a_2*(x-mean)/(std+self.eps)+self.b_2
        

In [42]:
class SublayerConnection(nn.Module):
    """Some Information about SublayerConnection"""
    def __init__(self,size,dropout):
        super(SublayerConnection, self).__init__()
        self.norm=LayerNorm(size)
        self.dropout=nn.Dropout(dropout)
    def forward(self, x,sublayer):
        return x+self.dropout(sublayer(self.norm(x)))

In [43]:
class EncoderLayer(nn.Module):
    """Encoder is made up of self-attn and feed forward (defined below)"""
    def __init__(self,size,self_attn,feed_forward,dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn=self_attn
        self.feed_forward=feed_forward
        self.sublayer=clones(SublayerConnection(size,dropout),2)
        self.size=size
    def forward(self, x,mask):
        x=self.sublayer[0](x,lambda x: self.self_attn(x,x,x,mask))

        return self.sublayer[1](x,self.feed_forward)
    
    # Each layer has two sub-layers. 
    # The first is a multi-head self-attention mechanism, 
    # and the second is a simple, position-wise fully connected feed-forward network.

In [None]:
class Decoder(nn.Module):
    """Some Information about Decoder"""
    def __init__(self,layer,N):
        super(Decoder, self).__init__()
        self.layers=clones(layer,N)
        self.norm=LayerNorm(layer.size)


    def forward(self, x,memory,src_mask,tgt_mask):
        for layer in self.layers:
            x=layer(x,memory,src_mask,tgt_mask)
        return self.norm(x)

In [None]:
class DecoderLayer(nn.Module):
    """Some Information about DecoderLayer"""
    def __init__(self,size,self_attn,src_attn,feed_forward,dropout):
        super(DecoderLayer, self).__init__()
        self.size=size
        self.self_attn=src_attn
        self.src_attn=src_attn
        self.feed_forward=feed_forward
        self.dropout=dropout
        self.sublayer=clones(SublayerConnection(size,dropout,3))

    def forward(self,x,memory,src_mask,tgt_mask):
        m=memory
        x=self.sublayer[0](x,lambda x: self.self_attn(x,x,x,tgt_mask))
        x=self.sublayer[1](x,lambda x: self.src_attn(x,m,m,src_mask))
        return self.sublayer[2](x,self.feed_forward)

In [None]:
def subsequent_mask(size):
    # 修改自注意力层 防止关注到位置之后的内容
    attn_shape=(1,size,size)
    subsequent_mask=torch.triu(torch.ones(attn_shape),diagonal=1).type(torch.uint8)
    return subsequent_mask==0

In [None]:
def example_mask():
    LS_data=pd.concat(
        [pd.DataFrame(
            {
                "Subsequent Mask": subsequent_mask(20)[0][x,y].flatten(),
                "Window": x,
                "Masking": y,
            }
        )
        for y in range(20)
        for x in range(20)
        ]
    )
    return (
        alt.Chart(LS_data)
        .mark_rect()
        .properties(height=250,windth=250)
        .
    )