In [1]:
from fastai.text.all import *
import pandas as pd

In [2]:
if torch.cuda.is_available():
  device = "cuda:0"
else:
  device = "cpu"
device

'cuda:0'

In [3]:
path = untar_data(URLs.IMDB)

https://docs.fast.ai/text.core.html

In [7]:
files = get_text_files(path, folders = ['train', 'test', 'unsup'])

In [9]:
txt = files[0].open().read()
txt[:75]

"It's about jealousy, it's about racism, it's about manipulation, but the un"

In [61]:
txts = L(o.open().read() for o in files[:200])
txts

(#200) ['It\'s about jealousy, it\'s about racism, it\'s about manipulation, but the underlying message is love. Geoffrey Sax tried to pull off Shakespeare\'s Othello, by bringing it to modern day context. However, the actors were not convincing enough to pull this off. There were extra bodies to help put everything in to perspective, however, John Othello, played by Eamonn Walker, over reacted a lot in this film, causing for the down fall of Keeley Hawes, Dessie Brabant, eventually ending in Dessie\'s death. <br /><br />Ben Jago, played by Christopher Eccleston, was seen as the main character in the film. He didn\'t give enough evidence for Dessie to be cheating on Othello, with Michael Cass, played by Richard Coyle. Instead he just played a friend to all and gave one reason as to why she "was" cheating. In the play, it took a lot more convincing from Iago to make Othello even suspect anything. This change made the movie more about rage for the wrong reasons, than what the book was ba

## Subword Tokenization

In [116]:
def subword(sz):
    sp = SubwordTokenizer(vocab_sz=sz)
    sp.setup(txts)
    return ' '.join(first(sp([txt]))[:])

In [16]:
subword(10000)

"▁It ' s ▁about ▁jea lousy , ▁it ' s ▁about ▁racism , ▁it ' s ▁about ▁ manipulati on , ▁but ▁the ▁under ly ing ▁message ▁is ▁love . ▁Ge off re y ▁S ax ▁tri ed ▁to ▁pull"

In [117]:
subword(1000)

'▁It \' s ▁about ▁ j e al ous y , ▁it \' s ▁about ▁ra c is m , ▁it \' s ▁about ▁man i p ul ation , ▁but ▁the ▁under ly ing ▁mess age ▁is ▁love . ▁G e off re y ▁S a x ▁tri ed ▁to ▁p u ll ▁off ▁S h ake s p ear e \' s ▁Othello , ▁by ▁ br ing ing ▁it ▁to ▁modern ▁day ▁con t e x t . ▁How ever , ▁the ▁actors ▁were ▁not ▁con v in c ing ▁enough ▁to ▁p u ll ▁this ▁off . ▁There ▁were ▁ex t ra ▁bo d ies ▁to ▁help ▁put ▁everything ▁in ▁to ▁p ers p ective , ▁how ever , ▁John ▁Othello , ▁play ed ▁by ▁E a m on n ▁W al k er , ▁over ▁re act ed ▁a ▁lot ▁in ▁this ▁film , ▁c a us ing ▁for ▁the ▁down ▁fall ▁of ▁K e e le y ▁H a w es , ▁De s s ie ▁B ra b ant , ▁eventually ▁end ing ▁in ▁De s s ie \' s ▁de a th . ▁ < br ▁/> < br ▁/> B en ▁Ja g o , ▁play ed ▁by ▁Christ op h er ▁E c c le st on , ▁was ▁seen ▁as ▁the ▁main ▁character ▁in ▁the ▁film . ▁He ▁did n \' t ▁give ▁enough ▁ e v id en ce ▁for ▁De s s ie ▁to ▁be ▁ch e ating ▁on ▁Othello , ▁with ▁M ich a el ▁C as s , ▁play ed ▁by ▁R ich ard ▁C o y le . ▁Inste

## Numericalization

In [17]:
spacy = WordTokenizer()
tkn = Tokenizer(spacy)



In [36]:
toks200 = txts[:200].map(tkn)
toks200[0]

(#530) ['xxbos','xxmaj','it',"'s",'about','jealousy',',','it',"'s",'about'...]

In [23]:
num = Numericalize()
num.setup(toks200)
coll_repr(num.vocab,20)

"(#1920) ['xxunk','xxpad','xxbos','xxeos','xxfld','xxrep','xxwrep','xxup','xxmaj','the','.',',','and','a','to','of','is','i','it','in'...]"

In [25]:
toks = tkn(txt)

In [26]:
nums = num(toks)[:20]
nums

TensorText([   2,    8,   18,   25,   66,    0,   11,   18,   25,   66, 1416,
              11,   18,   25,   66,    0,   11,   31,    9,    0])

In [29]:
' '.join(num.vocab[o] for o in nums)

"xxbos xxmaj it 's about xxunk , it 's about racism , it 's about xxunk , but the xxunk"

In [33]:
toks[:20]

(#20) ['xxbos','xxmaj','it',"'s",'about','jealousy',',','it',"'s",'about'...]

## Batches

In [35]:
nums200 = toks200.map(num)

dl = LMDataLoader(nums200)

x,y = first(dl)
x.shape, y.shape

(torch.Size([64, 72]), torch.Size([64, 72]))

In [40]:
' '.join(num.vocab[o] for o in x[0][:20])

"xxbos xxmaj it 's about xxunk , it 's about racism , it 's about xxunk , but the xxunk"

In [41]:
' '.join(num.vocab[o] for o in y[0][:20])

"xxmaj it 's about xxunk , it 's about racism , it 's about xxunk , but the xxunk message"

## FastAI Transforms

In [43]:
files = get_text_files(path, folders = ['train','test'])
txts = L(o.open().read() for o in files[:2000])

In [44]:
tok = Tokenizer.from_folder(path)
tok.setup(txts)
toks = txts.map(tok)
toks[0]

(#530) ['xxbos','xxmaj','it',"'s",'about','jealousy',',','it',"'s",'about'...]

In [45]:
num = Numericalize()
num.setup(toks)
nums = toks.map(num)
nums[0][:10]

TensorText([   2,    8,   18,   23,   62, 6709,   11,   18,   23,   62])

In [46]:
nums_dec = num.decode(nums[0][:10])
nums_dec

(#10) ['xxbos','xxmaj','it',"'s",'about','jealousy',',','it',"'s",'about']

In [47]:
tok.decode(nums_dec)

"xxbos xxmaj it 's about jealousy , it 's about"

In [48]:
tok((txts[0], txt[1]))

((#530) ['xxbos','xxmaj','it',"'s",'about','jealousy',',','it',"'s",'about'...],
 (#2) ['xxbos','t'])

In [50]:
def f(x:int): return x+1
tfm = Transform(f)
tfm(2),tfm(2.0)

(3, 2.0)

In [51]:
@Transform
def f(x:int): return x+1
f(2),f(2.0)

(3, 2.0)

In [55]:
class NormalizeMean(Transform):
    def setups(self, items): self.mean = sum(items)/len(items)
    def encodes(self, x): return x-self.mean
    def decodes(self, x): return x+self.mean

In [57]:
tfm = NormalizeMean()
tfm.setup([1,2,3,4,5])
start = 2
y = tfm(start)
z = tfm.decode(y)
tfm.mean,y,z

(3.0, -1.0, 2.0)

In [60]:
doc(Transform)

Transform(self, enc=None, dec=None, split_idx=None, order=None)
Delegates (`__call__`,`decode`,`setup`) to (<code>encodes</code>,<code>decodes</code>,<code>setups</code>) if `split_idx` matches

To get a prettier result with hyperlinks to source code and documentation, install nbdev: pip install nbdev


In [63]:
tfms = Pipeline([tok, num])
t = tfms(txts[0])
t[:20]

TensorText([   2,    8,   18,   23,   62, 6709,   11,   18,   23,   62, 4564,
              11,   18,   23,   62, 7975,   11,   31,    9, 5107])

In [64]:
tfms.decode(t)[:100]

"xxbos xxmaj it 's about jealousy , it 's about racism , it 's about manipulation , but the underlyin"

In [65]:
tls = TfmdLists(files, [Tokenizer.from_folder(path), Numericalize])

In [66]:
t = tls[0]
t[:20]

TensorText([   2,    8,   17,   23,   60, 5851,   11,   17,   23,   60, 3192,
              11,   17,   23,   60, 7436,   11,   31,    9, 4473])

In [68]:
tls.decode(t)[:100]

"xxbos xxmaj it 's about jealousy , it 's about racism , it 's about manipulation , but the underlyin"

In [69]:
tls.show(t)

xxbos xxmaj it 's about jealousy , it 's about racism , it 's about manipulation , but the underlying message is love . xxmaj geoffrey xxmaj sax tried to pull off xxmaj shakespeare 's xxmaj othello , by bringing it to modern day context . xxmaj however , the actors were not convincing enough to pull this off . xxmaj there were extra bodies to help put everything in to perspective , however , xxmaj john xxmaj othello , played by xxmaj eamonn xxmaj walker , over reacted a lot in this film , causing for the down fall of xxmaj keeley xxmaj hawes , xxmaj xxunk xxmaj xxunk , eventually ending in xxmaj xxunk 's death . 

 xxmaj ben xxmaj jago , played by xxmaj christopher xxmaj eccleston , was seen as the main character in the film . xxmaj he did n't give enough evidence for xxmaj xxunk to be cheating on xxmaj othello , with xxmaj michael xxmaj cass , played by xxmaj richard xxmaj coyle . xxmaj instead he just played a friend to all and gave one reason as to why she " was " cheating . xxmaj i

In [70]:
cut = int(len(files)*0.8)
splits = [list(range(cut)), list(range(cut,len(files)))]
tls = TfmdLists(files, [Tokenizer.from_folder(path), Numericalize], splits=splits)

In [71]:
tls.valid[0][:20]

TensorText([  2,   8,  65,  80,  19, 722,  21,   9, 917, 807,  42,  36, 493,
            326,  10,   8,  20,  16,  13, 103])

In [104]:
tfms = [[Tokenizer.from_folder(path), Numericalize]]
files = get_text_files(path, folders = ['train', 'test'])
splits = GrandparentSplitter(valid_name='test')(files)
dsets = Datasets(files, tfms, splits=splits)
dls = dsets.dataloaders(dl_type=LMDataLoader, before_batch=pad_input)

In [105]:
dls.show_batch(max_n=2)

Unnamed: 0,text,text_
0,"xxbos xxmaj from the q & a before and after , this is what i could gather : xxmaj some xxmaj irish guy wants to make a movie . xxmaj nothing in particular , just any movie . xxmaj so , one night at a party , he hears some ex - roadie tell him a classic bit of rock n ' roll lore ; the one about how xxmaj gram xxmaj","xxmaj from the q & a before and after , this is what i could gather : xxmaj some xxmaj irish guy wants to make a movie . xxmaj nothing in particular , just any movie . xxmaj so , one night at a party , he hears some ex - roadie tell him a classic bit of rock n ' roll lore ; the one about how xxmaj gram xxmaj parsons"
1,"style . xxmaj but this movie , however , was probably the worst movie i have seen in 2008 . i can honestly believe that this movie is unknown , and i think it should stay like this , for movies like these are making the thriller genre a joke . \n\n i advise anyone that is a fan of thriller movies , or even simply movies to stay far away from",". xxmaj but this movie , however , was probably the worst movie i have seen in 2008 . i can honestly believe that this movie is unknown , and i think it should stay like this , for movies like these are making the thriller genre a joke . \n\n i advise anyone that is a fan of thriller movies , or even simply movies to stay far away from this"


In [111]:
# Get a batch of data from the DataLoaders
x, y = dls.one_batch()

' '.join(num.vocab[o] for o in x[0][:20])


'xxbos xxmaj gives my have mouth him acts whore this as is . xxmaj i is and though of t'

In [4]:
files = get_text_files(path, folders = ['train', 'test', 'unsup'])
txts = L(o.open().read() for o in files[:20000])
txts

(#20000) ['I´ve seen the original Candyman once and it was quite an effective horror movie that seems to be regarded as a modern horror classic. I have not seen the first sequel and I have no intention whatsoever to even think about renting it. Maybe they´ll show it on TV and I can catch it if I don´t have anything better to do.<br /><br />Candyman 3... The third part in a horror series can´t be that good (the only exception being Scream 3), it just can´t. To be honest, the only reason why I bothered to watch this movie was that I was sure they would show the leading lady naked at some point. At least in teen horror flicks you get to see naked ladies. Well, we do get some breasts, but they don´t belong to the main lady. I kept waiting and waiting till the very end and what did I get? NOTHING! Instead of some skin the people who most likely accidentally ended up seeing the movie got a stupid brainless movie with mediocre-at-best performances and hilarious one-liners. If it´s blood that 

In [5]:
tok = Tokenizer.from_folder(path)
tok.setup(txts)
toks = txts.map(tok)

num = Numericalize()
num.setup(toks)
nums = toks.map(num)

In [237]:
len(nums)

1000

In [6]:
tfms = [[tok, num]]
files = get_text_files(path, folders = ['train', 'test'])[:20000]
cut = int(len(files)*0.8)
splits = [list(range(cut)), list(range(cut,len(files)))]
#splits = GrandparentSplitter(valid_name='test')(files)
dsets = Datasets(files, tfms, splits=splits)
dls = dsets.dataloaders(dl_type=LMDataLoader)

In [184]:
doc(DataLoaders)

In [183]:
dsets.valid[0]

(TensorText([   2,    8,   20,   16,   13, 1197,  401,  107,   66,    9,  229,
              582,   15,  311,    5,  161,  871,   80,   13,  374,    0,    0,
               65,   12,   61,   17,    0, 1854,   10,    8,   20,   16,  684,
               14,   43,   66,  154,  114,   57,  802,  140,   54,  347,  443,
              120,   19,   55, 1103, 1318,   10,    8,   52,   25,   13,    0,
               12,   13,    0,  231,   13, 1806,   19,    9,  144,   25, 1728,
               44,   34,  101,   86,  103,    9,  299,   25,  615, 1103,   10,
                8,  116,  277,   52,   16,   13,    0,  167,   36,  102,   15,
                9,  518,   16,  151, 1094,    0,   61,   27,    8, 1573,    0,
               49,   27,    8,   33,   89,   10,   27,   17,    0,    0,   12,
              566,  439,  120,  164,   84,   16, 1086,   19,   13,    0,   35,
               10,    8,  932,   17,  566,   33,  175,   11,   84,  141,  529,
              354,   71,   70,    0,   80,   17,   7

In [158]:
len(dls[1])

27

In [7]:
dls.show_batch()

Unnamed: 0,text,text_
0,"xxbos "" one shot , one kill , no exceptions . "" a must see if you are into marines or snipers . two big thumbs up ! xxmaj great overall storyline , great camera work , good drama , action , details , and more . xxmaj pretty close to the real thing . xxmaj but this is n't a film to breakdown and pick out the editing faults . this",""" one shot , one kill , no exceptions . "" a must see if you are into marines or snipers . two big thumbs up ! xxmaj great overall storyline , great camera work , good drama , action , details , and more . xxmaj pretty close to the real thing . xxmaj but this is n't a film to breakdown and pick out the editing faults . this is"
1,"xxmaj australia , no redeeming features what so ever . xxmaj the dialog was hokey , the acting , awful and the script sucked ! ! xxmaj whoever thought it would be a good idea to do a sequel or follow up to the far superior xxmaj john xxmaj badham film , xxmaj wargames from the 80s , well they must of been on something cause it was a bad idea !","australia , no redeeming features what so ever . xxmaj the dialog was hokey , the acting , awful and the script sucked ! ! xxmaj whoever thought it would be a good idea to do a sequel or follow up to the far superior xxmaj john xxmaj badham film , xxmaj wargames from the 80s , well they must of been on something cause it was a bad idea ! !"
2,"xxmaj midler movie or go to the nearest music store and purchase one of xxmaj bette xxmaj midler 's albums . xxbos xxmaj this movie is one of the masterpieces from xxmaj mr . xxmaj antonioni . xxmaj it is about youth , distraction , happiness , alienation , materialism , honor , corruption . xxmaj and it is like everything else from great xxmaj italian director xxunk art . xxbos i","midler movie or go to the nearest music store and purchase one of xxmaj bette xxmaj midler 's albums . xxbos xxmaj this movie is one of the masterpieces from xxmaj mr . xxmaj antonioni . xxmaj it is about youth , distraction , happiness , alienation , materialism , honor , corruption . xxmaj and it is like everything else from great xxmaj italian director xxunk art . xxbos i watched"
3,"time . xxmaj avoid this like the plague , annoying music throughout , terrible editing , no comedy , its xxunk than a novelty mug … my xxunk wanted to watch this thinking it would be xxmaj legally xxmaj blonde material or something kind of watchable , but never better than average , chick flick . xxmaj its the first time she was begging me to push the stop button . \n\n",". xxmaj avoid this like the plague , annoying music throughout , terrible editing , no comedy , its xxunk than a novelty mug … my xxunk wanted to watch this thinking it would be xxmaj legally xxmaj blonde material or something kind of watchable , but never better than average , chick flick . xxmaj its the first time she was begging me to push the stop button . \n\n xxmaj"
4,"had xxmaj world xxmaj premiered the day before at the xxmaj xxunk xxmaj theatre xxup xxunk xxmaj screening xxmaj room . \n\n xxmaj the basic plot involves xxmaj morgan xxmaj freeman playing a one time popular actor who is on the downward slope of his career and who is taking on roles that may be beneath him , but which he still does with a positive attitude knowing that he needs to","xxmaj world xxmaj premiered the day before at the xxmaj xxunk xxmaj theatre xxup xxunk xxmaj screening xxmaj room . \n\n xxmaj the basic plot involves xxmaj morgan xxmaj freeman playing a one time popular actor who is on the downward slope of his career and who is taking on roles that may be beneath him , but which he still does with a positive attitude knowing that he needs to pay"
5,"whoopi as xxmaj blind xxmaj lemon and imitating xxmaj mick xxmaj jagger ? xxmaj great moments captured on film for sure but the movie still kind of sucks , right ? xxmaj that 's how i feel about "" rich xxmaj in xxmaj love "" . a man hears his wife sing for the first time . xxmaj post - xxunk teenagers talk about the nature of love . xxmaj albert xxmaj","as xxmaj blind xxmaj lemon and imitating xxmaj mick xxmaj jagger ? xxmaj great moments captured on film for sure but the movie still kind of sucks , right ? xxmaj that 's how i feel about "" rich xxmaj in xxmaj love "" . a man hears his wife sing for the first time . xxmaj post - xxunk teenagers talk about the nature of love . xxmaj albert xxmaj finney"
6,"questions that hit me this moment : xxmaj polish xxmaj xxunk 's xxmaj french is plenty colloquial ( un maximum d ' "" un maximum d ' "" ) . xxmaj even so , does she have an accent ? i think i can tell she does . xxmaj what does the absence of color add , especially at the single spot the fringe of the city is glimpsed ? xxmaj how","that hit me this moment : xxmaj polish xxmaj xxunk 's xxmaj french is plenty colloquial ( un maximum d ' "" un maximum d ' "" ) . xxmaj even so , does she have an accent ? i think i can tell she does . xxmaj what does the absence of color add , especially at the single spot the fringe of the city is glimpsed ? xxmaj how does"
7,"that or not . \n\n xxmaj but i loved how urban the coincidences were , from immigrants to love xxunk to crowded subway cars to hanging around xxunk . \n\n xxmaj the subtitles quite annoyingly gave both parts of a dialog at once . \n\n ( originally written 12 / 8 / 2001 ) xxbos xxmaj shaggy & scooby - doo xxmaj get a xxmaj clue . xxmaj it 's like watching","or not . \n\n xxmaj but i loved how urban the coincidences were , from immigrants to love xxunk to crowded subway cars to hanging around xxunk . \n\n xxmaj the subtitles quite annoyingly gave both parts of a dialog at once . \n\n ( originally written 12 / 8 / 2001 ) xxbos xxmaj shaggy & scooby - doo xxmaj get a xxmaj clue . xxmaj it 's like watching a"
8,"apartment when just one scene before he had 20 men that are no more than 15 feet from him with sign of escape ? xxmaj who knows the scene just cuts to him in the apartment … … xxunk well suspension of belief i guess ) , the movie to me stays interesting . xxmaj it is only until the last 20 minutes that the film seems to feel like it could","when just one scene before he had 20 men that are no more than 15 feet from him with sign of escape ? xxmaj who knows the scene just cuts to him in the apartment … … xxunk well suspension of belief i guess ) , the movie to me stays interesting . xxmaj it is only until the last 20 minutes that the film seems to feel like it could have"


In [16]:
def scaled_dot_product_attention(query, key, value, mask=None):
    dim_k = query.size(-1)
    scores = torch.bmm(query, key.transpose(1, 2)) / np.sqrt(dim_k)
    if mask is not None:
        if scores.shape[1] == mask.shape[1]:
            scores = scores.masked_fill(mask == 0, float("-inf"))
        else:
            mask = torch.tril(torch.ones(scores.shape[1], scores.shape[1])).unsqueeze(0).to(device)
            scores = scores.masked_fill(mask == 0, float("-inf"))
    weights = F.softmax(scores, dim=-1)
    return weights.bmm(value)

class AttentionHead(nn.Module):
    def __init__(self, embed_dim, head_dim, vocab_size):
        super().__init__()
        self.q = nn.Linear(embed_dim, head_dim)
        self.k = nn.Linear(embed_dim, head_dim)
        self.v = nn.Linear(embed_dim, head_dim)
        self.mask = torch.tril(torch.ones(vocab_size, vocab_size)).unsqueeze(0).to(device)

    def forward(self, hidden_state):
        attn_outputs = scaled_dot_product_attention(
            self.q(hidden_state), self.k(hidden_state), self.v(hidden_state), self.mask)
        return attn_outputs

class MultiHeadAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        embed_dim = config.hidden_size
        num_heads = config.num_attention_heads
        head_dim = embed_dim // num_heads
        self.heads = nn.ModuleList(
            [AttentionHead(embed_dim, head_dim, 72) for _ in range(num_heads)]
        )
        self.output_linear = nn.Linear(embed_dim, embed_dim)

    def forward(self, hidden_state):
        x = torch.cat([h(hidden_state) for h in self.heads], dim=-1)
        x = self.output_linear(x)
        return x

class FeedForward(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.linear_1 = nn.Linear(config.hidden_size, config.intermediate_size)
        self.linear_2 = nn.Linear(config.intermediate_size, config.hidden_size)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        
    def forward(self, x):
        x = self.linear_1(x)
        x = self.gelu(x)
        x = self.linear_2(x)
        x = self.dropout(x)
        return x

class TransformerEncoderLayer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.layer_norm_1 = nn.LayerNorm(config.hidden_size)
        self.layer_norm_2 = nn.LayerNorm(config.hidden_size)
        self.attention = MultiHeadAttention(config)
        self.feed_forward = FeedForward(config)

    def forward(self, x):
        # Apply layer normalization and then copy input into query, key, value
        hidden_state = self.layer_norm_1(x)
        # Apply attention with a skip connection
        x = x + self.attention(hidden_state)
        # Apply feed-forward layer with a skip connection
        x = x + self.feed_forward(self.layer_norm_2(x))
        return x

class Embeddings(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.token_embeddings = nn.Embedding(config.vocab_size, 
                                             config.hidden_size)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout()

    def forward(self, input_ids):
        # Create position IDs for input sequence
        seq_length = input_ids.size(1)
        position_ids = torch.arange(seq_length, dtype=torch.long).unsqueeze(0).to(device)
        # Create token and position embeddings
        token_embeddings = self.token_embeddings(input_ids)
        
        position_embeddings = self.position_embeddings(position_ids)
        # Combine token and position embeddings
        embeddings = token_embeddings + position_embeddings
        embeddings = self.layer_norm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings

class TransformerEncoder(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.embeddings = Embeddings(config)
        self.layers = nn.ModuleList([TransformerEncoderLayer(config) 
                                     for _ in range(config.num_hidden_layers)])

    def forward(self, x):
        x = self.embeddings(x)
        for layer in self.layers:
            x = layer(x)
        return x

class TransformerForSequenceClassification(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.encoder = TransformerEncoder(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        
    def forward(self, x):
        #print(x)
        x = self.encoder(x)#[:, 0, :] # select hidden state of [CLS] token
        #print(x)
        x = self.dropout(x)
        x = self.classifier(x)
        #print(x)
        return x

In [9]:
from transformers import AutoConfig
config = AutoConfig.from_pretrained('bert-base-uncased')

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
config.vocab_size = len(dls.vocab)
config.num_labels = len(dls.vocab)
config.hidden_size = 132
encoder_classifier = TransformerForSequenceClassification(config)

In [18]:
model = encoder_classifier

model.to(device)

dls.to(device)

learn = Learner(
    dls, 
    model, 
    loss_func=CrossEntropyLossFlat(), 
    metrics=[accuracy]
)

learn.fit_one_cycle(1, 1e-3)

#learn.export('mymodel.pkl')

epoch,train_loss,valid_loss,accuracy,time


In [234]:
learn.save('my_model')

Path('models/my_model.pth')

In [21]:
import torch

mytokenizer = Pipeline([tok,num])

# Define a function for text generation
def generate_text(model, starting_text, max_length=10):
    token_ids = mytokenizer(starting_text).to(device)
    input_ids = torch.tensor(token_ids).unsqueeze(0).to(device)  # Add batch dimension
    with torch.no_grad():
        for _ in range(max_length):
            outputs = model(input_ids)
            logits = outputs[:, -1, :]  # Get logits for the last token
            next_token_id = torch.argmax(logits, dim=-1)
            #print(next_token_id)
            #token_ids.append(next_token_id.item())
            token_ids = torch.cat((token_ids, next_token_id),dim=0)
            input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0).to(device)], dim=-1)
            #print(input_ids)
            if next_token_id == 'EOS':
                break
    return mytokenizer.decode(token_ids)

# Generate text
generated_text = generate_text(learn.model, "I liked this movie because")
print(generated_text)


  input_ids = torch.tensor(token_ids).unsqueeze(0).to(device)  # Add batch dimension


xxbos i liked this movie because it was a great movie . xxmaj it 's a
