<a href="https://colab.research.google.com/github/hshuai97/Colab20210803/blob/main/HieGAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

cf. [dgl gat](https://docs.dgl.ai/en/0.4.x/tutorials/models/1_gnn/9_gat.html#)

# install libraries

In [1]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 200})'''))

import torch
try:
  import dgl
except ModuleNotFoundError:
  CUDA = 'cu' + torch.version.cuda.replace('.','')
  !pip install dgl-{CUDA} -f https://data.dgl.ai/wheels/repo.html

try:
  import word2vec
except ModuleNotFoundError:
  !pip install word2vec

import nltk
nltk.download('punkt')
nltk.download('tagsets')
nltk.download('averaged_perceptron_tagger')


try:
  import torch_scatter
except ModuleNotFoundError:
  TORCH = torch.__version__.split('+')[0]
  CUDA = 'cu' + torch.version.cuda.replace('.','')
  !pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html

<IPython.core.display.Javascript object>

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl-cu113
  Downloading https://data.dgl.ai/wheels/dgl_cu113-0.8.2.post1-cp37-cp37m-manylinux1_x86_64.whl (220.6 MB)
[K     |████████████████████████████████| 220.6 MB 16 kB/s 
[?25hCollecting psutil>=5.8.0
  Downloading psutil-5.9.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (281 kB)
[K     |████████████████████████████████| 281 kB 8.3 MB/s 
Installing collected packages: psutil, dgl-cu113
  Attempting uninstall: psutil
    Found existing installation: psutil 5.4.8
    Uninstalling psutil-5.4.8:
      Successfully uninstalled psutil-5.4.8
Successfully installed dgl-cu113-0.8.2.post1 psutil-5.9.1


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting word2vec
  Downloading word2vec-0.11.1.tar.gz (42 kB)
[K     |████████████████████████████████| 42 kB 804 kB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: word2vec
  Building wheel for word2vec (PEP 517) ... [?25l[?25hdone
  Created wheel for word2vec: filename=word2vec-0.11.1-py2.py3-none-any.whl size=156431 sha256=b2468c494732ec2707c3e35f9e24c1b42c1fbc1729502c48c3687015eee31188
  Stored in directory: /root/.cache/pip/wheels/c9/c0/d4/29d797817e268124a32b6cf8beb8b8fe87b86f099d5a049e61
Successfully built word2vec
Installing collected packages: word2vec
Successfully installed word2vec-0.11.1


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package tagsets to /root/nltk_data...
[nltk_data]   Unzipping help/tagsets.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.12.0+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 6.7 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9


# main

In [2]:
import os
import time
import argparse
import numpy as np
import random
import pandas as pd
import sklearn

import torch as th
from torch.nn import functional as F
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import  DataLoader
from torch_scatter import scatter_add, scatter_mean

import dgl
from dgl.nn import GATConv

import word2vec

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize



def MyDatasets(dataset_name):

    NAME = dataset_name
    if NAME not in ['20ng', 'r8', 'r52', 'oh', 'mr']:
      raise ValueError('The dataset is not support')

    PATH = '/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/data/'

    train_texts = []
    train_labels = []
    with open(os.path.join(PATH, NAME+'-train-stemmed.txt'), 'r') as f:
        data = f.readlines()

        for line in data:
          line = line.strip()
          t = line.split('\t')
          train_texts.append(t[1])
          train_labels.append(t[0])

    dev_texts = []
    dev_labels = []
    with open(os.path.join(PATH, NAME+'-dev-stemmed.txt'), 'r') as f:
        data = f.readlines()

        for line in data:
          line = line.strip()
          t = line.split('\t')
          dev_texts.append(t[1])
          dev_labels.append(t[0])

    test_texts = []
    test_labels = []
    with open(os.path.join(PATH, NAME+'-test-stemmed.txt'), 'r') as f:
        data = f.readlines()

        for line in data:
          line = line.strip()
          t = line.split('\t')
          test_texts.append(t[1])
          test_labels.append(t[0])

    target_names = list(set(train_labels))
    label2idx = {label: idx for idx, label in enumerate(target_names)}

    print(f'Dataset: {NAME}, Total train: {len(train_texts)+len(dev_texts)}, Train size: {len(train_texts)}, Dev size: {len(dev_texts)}, Test size: {len(test_texts)}, Num_class: {len(label2idx)}')
    print(f'labels: {label2idx}')
    print('*'*50)

    return train_texts,  train_labels, dev_texts, dev_labels, test_texts, test_labels, label2idx


class GATLayerSenLev(torch.nn.Module):   # word level --> sentence level
  def __init__(self, in_dim, hidden_dim, num_classes, device):
    super(GATLayerSenLev, self).__init__()

    self.in_dim = in_dim
    self.num_hidden = hidden_dim
    self.num_classes = num_classes
    self.device = device

    self.heads =  [1]
    self.feat_drop = 0.5
    self.attn_drop = 0.5
    self.negative_slope = 0.2
    self.activation = F.elu
    self.gram = 1

    self.gat_wor = GATConv(self.in_dim, self.num_hidden, self.heads[0], self.feat_drop, self.attn_drop, self.negative_slope, False, None)
    self.gat_sen = GATConv(self.num_hidden, self.num_classes, self.heads[0], self.feat_drop, self.attn_drop, self.negative_slope, False, None)

    self.lin = torch.nn.Linear(hidden_dim, num_classes)

  def add_edges_sen(self, n): # add edges for sentence-level
    edges = []
    for i in range(n):
        u = i
        for j in range(max(0, i-self.gram), min(i+self.gram+1, n)):
            v = j
            # - first connect the new sub_graph
            edges.append([u, v])
    return edges

  def sentence2graph(self, inputs):  # for sen-level, 
    n = len(inputs)  # Number of sentence in each sample
    edges = self.add_edges_sen(n) 
    u, v = zip(*edges)
    g = dgl.graph((u, v), num_nodes=n, idtype=torch.int32).to(self.device)

    g.ndata['h'] = inputs

    return g

  def spl_mat(self, index: list, matrix: torch):
    split_size = [ index.count(i) for i in set(index)]
    s_mat = torch.split(matrix, split_size)
    return s_mat  # splited matrix

  def forward(self, g,  h, all_wor_id:list):
    x = self.gat_wor(g, h)
    g.ndata['h'] = x.mean(1)

    out_worlev = dgl.max_nodes(g, feat='h')  # (all_num_sen_one_batch, hidden_dim)
    temp = self.spl_mat(all_wor_id, out_worlev)
    sen_g = [self.sentence2graph(t) for t in temp]

    sen_batch_g = dgl.batch(sen_g)
    h = sen_batch_g.ndata['h']
    x = self.gat_sen(sen_batch_g, h)
    sen_batch_g.ndata['h'] = x.mean(1)

    out_senlev = dgl.mean_nodes(sen_batch_g, feat='h')  # (N, C)

    all_wor_id = torch.tensor(all_wor_id, device=self.device)  # list to tensor
    out_worlev = scatter_add(out_worlev, all_wor_id, dim=0)  # (all_num_sen_one_batch, hidden_dim) ---> (N, hidden_dim)

    return out_senlev, self.lin(out_worlev)


class GATLayerforDocLev(torch.nn.Module):  # doc-lev
  def __init__(self, in_dim, hidden_dim, num_classes):
    super(GATLayerforDocLev, self).__init__()

    self.in_dim = in_dim
    self.num_hidden = hidden_dim

    self.num_layers = 3
    self.heads = [3]*(self.num_layers-1) + [1]
    self.gat_layers = th.nn.ModuleList()
    self.feat_drop = 0.5
    self.attn_drop = 0.5
    self.negative_slope = 0.2
    self.activation = F.elu

    if self.num_layers > 1:
        # input projection (no residual)
        self.gat_layers.append(GATConv(self.in_dim, self.num_hidden, self.heads[0], self.feat_drop, self.attn_drop, self.negative_slope, False, self.activation))

        # hidden layers
        for l in range(1, self.num_layers-1):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(self.num_hidden *self. heads[l-1], self.num_hidden, self.heads[l], self.feat_drop, self.attn_drop, self.negative_slope, False, self.activation))
        # output projection
        self.gat_layers.append(GATConv(self.num_hidden * self.heads[-2], num_classes, self.heads[-1], self.feat_drop, self.attn_drop, self.negative_slope, False, None))
    
    else:
        self.gat_layers.append(GATConv(self.in_dim, num_classes, self.heads[0], self.feat_drop, self.attn_drop, self.negative_slope, False, None))
  
  def forward(self, g, inputs):
    h = inputs
    for l in range(self.num_layers):
        h = self.gat_layers[l](g, h)
        h = h.flatten(1) if l != self.num_layers - 1 else h.mean(1)
    return h  # (N, C)


class HieGAT(torch.nn.Module):
  def __init__(self, vocab, in_dim, hidden_dim, num_classes, sen_max_len, device):
    super(HieGAT, self).__init__()
    self.vocab_size = len(vocab)
    self.vocab = vocab
    self.num_classes = num_classes

    self.node_hidden_doclev = torch.nn.Embedding(self.vocab_size, in_dim)  # (num_vocab+1, num_hidden), include 'unk
    self.node_hidden_doclev.weight.data.copy_(torch.tensor(self.load_w2v(f'/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/model/glove.6B/glove.6B.{in_dim}d.w2vformat.txt')))
    self.node_hidden_doclev.weight.requires_grad = True

    self.node_hidden_senlev = torch.nn.Embedding(self.vocab_size, in_dim)  # (num_vocab+1, num_hidden), include 'unk
    self.node_hidden_senlev.weight.data.copy_(torch.tensor(self.load_w2v(f'/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/model/glove.6B/glove.6B.{in_dim}d.w2vformat.txt')))
    self.node_hidden_senlev.weight.requires_grad = True


    self.gram = 1

    self.device = device
    self.sen_max_len = sen_max_len

    self.gat_senlev = GATLayerSenLev(in_dim, hidden_dim, num_classes, device)
    self.gat_doclev = GATLayerforDocLev(in_dim, hidden_dim, num_classes)
   


  def load_w2v(self, path):
    w2v = word2vec.load(path)
    embedding_matrix = []
    unk_d = len(w2v['the'])  # unknow word dimension
    for word in self.vocab:
      try:
        embedding_matrix.append(w2v[word])
      except KeyError:
        embedding_matrix.append(np.zeros(unk_d))
    
    return np.asarray(embedding_matrix)

  def add_edges(self, sample,  local_vocab_id):  # add edges for word-level
    edges = []
    for i, src in enumerate(sample):
      u = local_vocab_id[src]
      for j in range(max(0, i-self.gram), min(i+self.gram +1, len(sample))):
        dst = sample[j]
        v = local_vocab_id[dst]

        edges.append([u, v])
    return edges

  def wordlev2graph(self, sample):  # for  word-level, sample: [78, 63, 63, 33, 78,  ...],
    if len(sample) == 0:
      raise Exception('sample length is equal 0')
    if len(sample)>self.sen_max_len:
      sample = sample[:self.sen_max_len]

    local_vocab = set(sample)  # {78, 63, 33, ...}

    n = len(local_vocab)
    local_vocab_id = dict(zip(local_vocab, range(n)))  # {78:0, 63:1, 33:2, ...}
    u, v = zip(*self.add_edges(sample, local_vocab_id))
    
    g = dgl.graph((u, v), num_nodes=n, idtype=torch.int32).to(self.device)

    local_vocab_tensor = torch.tensor(list(local_vocab)).to(self.device)

    g.ndata['h'] =  self.node_hidden_senlev(local_vocab_tensor)

    return g

  def doc2graph(self, sample):  # for doc-level, sample: [78, 63, 63, 33, 78,  ...],
    if len(sample) == 0:
      raise Exception('sample length is equal 0')
    if len(sample)>self.sen_max_len:
      sample = sample[:self.sen_max_len]

    local_vocab = set(sample)  # {78, 63, 33, ...}

    n = len(local_vocab)
    local_vocab_id = dict(zip(local_vocab, range(n)))  # {78:0, 63:1, 33:2, ...}
    u, v = zip(*self.add_edges(sample, local_vocab_id))
    
    g = dgl.graph((u, v), num_nodes=n, idtype=torch.int32).to(self.device)

    local_vocab_tensor = torch.tensor(list(local_vocab)).to(self.device)
    g.ndata['h'] =  self.node_hidden_doclev(local_vocab_tensor)

    return g

  def lamb(self, x):  # lambda parameter for sen-lev and docl-ev out put
    lam = []  # d, s , w
    t = 1 / ( np.log(x) + 1)
    d =  t * 2.0/3.0
    s = 1 - t
    w = t * 1.0/3.0
    lam.append(d)
    lam.append(s)
    lam.append(w)
    return lam


  def forward(self, inputs):  # inputs: (one_batch_size, num_sentence, tokens)

    all_wor_g = []  # all word level graphs in one batch
    all_wor_id = []  # [0, 0, 0, 1,1,1,1,1, 2, 2, 2, ...]
    doc_g = [] # inputs for doc level: (batch_size, tokens)

    for i, (_x, _) in enumerate(inputs):
      # word level graphs
      wor_g = [self.wordlev2graph(wor) for wor in _x]
      _wor_id = [i for j in range(len(wor_g))]
      all_wor_id.extend(_wor_id)
      all_wor_g.extend(wor_g)

      # doc level graphs
      temp = [w for t in _x for w in t]
      doc_g.append(self.doc2graph(temp))

    # word level
    batch_wor_g = dgl.batch(all_wor_g)
    h = batch_wor_g.ndata['h']

    out_senlev, out_worlev = self.gat_senlev(batch_wor_g, h, all_wor_id)  # (N, C)


    # TODO 将sen-lev图和doc-level 图同时批量处理    
    
    # doc level output
    doc_batch_g = dgl.batch(doc_g)
    h = (doc_batch_g.ndata['h'])
    doc_batch_g.ndata['h'] = self.gat_doclev(doc_batch_g, h)
    
    out_doclev =  dgl.mean_nodes(doc_batch_g, feat='h')  # [N, C]


    average_sen = len(all_wor_id)/len(inputs)
    assert average_sen >=1, 'Average num sentence in one batch is less than 1, error!'
    lam = self.lamb(average_sen)


    total = lam[0]*F.log_softmax(out_doclev, dim=-1) + lam[1]*F.log_softmax(out_senlev, dim=-1) + lam[2]*F.log_softmax(out_worlev, dim=-1)  # (N, C)
    #total =F.log_softmax(out_doclev, dim=-1)  # (N, C)
    return total


def train(model, epoch, input, dev_input, DEVICE, DATASET, label_weight, lr):
  optimizer = torch.optim.AdamW(model.parameters(), weight_decay=1e-2, lr=lr)  # Adam

  PATIENCE = 8  # Patience on dev set to finish training
  no_improve = 0  # No improvement on dev set

  best_acc = 0.0
  dur = []

  for e in range(epoch):
    improved = ''
    model.train()
    t0 = time.time()

    for ba in input:  # Total 80s
      t1 = time.time()
      y = torch.tensor([label for _, label in ba]).to(DEVICE)  # 0.09s      
    
      # 0.09s
      outputs = model(ba)
      loss = F.nll_loss(outputs, y, weight=label_weight)
      
      loss.backward()  # Derive gradients
      optimizer.step()
      optimizer.zero_grad()  # Clear gradients
    
    val_acc = dev(model, dev_input, DEVICE)  # 5s

    if val_acc>best_acc:
      best_acc = val_acc
      no_improve = 0
      improved = '*'
      torch.save(model, f'/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/model/HieGAT_{DATASET}.pkl')
    else:
      no_improve+=1
    
    dur.append(time.time()-t0)
    print(f'Epoch: {e}, Train loss:{loss.item():.4f}, Val acc: {val_acc:.4f}, Times: {np.mean(dur):.4f}s, {improved}')

    if no_improve>=PATIENCE:
      print(f'No improvement on dev set, early stopping')
      break

def dev(model, input, DEVICE):
  model.eval()
  total_pred = 0.0
  correct = 0.0
  for ba in input:
    y = torch.tensor([label for _, label in ba]).to(DEVICE)  # 0.05s

    with torch.no_grad():
      outputs = model(ba)
      pred = torch.argmax(outputs, dim=1)
      
      correct_pred = torch.sum(pred==y)
      correct += correct_pred
      total_pred += len(y)
  
  return torch.div(correct, total_pred)  # Acc on dev set

def test(model, input, DEVICE):
  model.eval()
  total_pred = 0.0
  correct = 0.0
  for ba in input:
    y = torch.tensor([label for _, label in ba]).to(DEVICE)  # 0.05s

    with torch.no_grad():
      outputs = model(ba)
      pred = torch.argmax(outputs, dim=1)
      
      correct_pred = torch.sum(pred==y)
      correct += correct_pred
      total_pred += len(y)
  
  return torch.div(correct, total_pred)  # Acc on dev set

def  buildvocab(sample, min_count=5):
  # sample: ['wo xihuan ziran yuyan chuli', 'wo ai shengdu xuexi',  'wo xihuan jiqi xuexi']

  MIN_COUNT = min_count

  freq = {}
  for i in sample:
    for t in word_tokenize(i):
      if t not in freq:
        freq[t] = 0

  for i in sample:
    for t in word_tokenize(i):
      freq[t] +=1

  del_key = []
  for i in freq:
    if freq[i]<MIN_COUNT:
      del_key.append(i)
  
  for i in del_key:
    freq.pop(i)

  vocab_id = {}
  for i, key in enumerate(freq):
    vocab_id[key] = i
  print(f'vocab_id size: {len(vocab_id)}')
  print('*'*50)
  
  return vocab_id

#vocab = {'this':0, 'is':1, 'first':2, 'sentence':3, 'however':4, 'could':5, 'be':6, '<unk>':7, ... ,  '<pad>':100}
def w2id(input, vocab):
  ids = []
  for w in input:
    if w in [',', '.', '!', '?']:
      continue
    if w in vocab:
      ids.append(vocab[w])
    else: 
      ids.append(vocab['<unk>'])
  return ids

def batch_hiegnn(texts, labels, batch_size, label2idx, vocab_id):
  x = texts
  y = [label2idx[t] for t in labels]
  data = [(x[i], y[i]) for i in range(len(y))]
  random.shuffle(data)

  input = [(sent_tokenize(x), y)  for x, y in data]

  input1 = []
  for sample, y in input:
    t = []
    for s in sample:
      temp = w2id(word_tokenize(s), vocab_id)
      if len(temp) >0:
        t.append(temp)
    input1.append((t, y))

  input2 = [input1[i: i+batch_size] for i in range(0, len(input1), batch_size)]

  return input2


SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
random.seed(SEED)

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
  print(f'device: {DEVICE}')
  print(f'name: {torch.cuda.get_device_name(0)}')
  print(f'*'*50)


DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)
device: cuda:0
name: Tesla P100-PCIE-16GB
**************************************************


# run

In [6]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 400})'''))


def LabelWeight(train_label, label2idx, dataset_name):  # for r8, r52, oh
  NAME = dataset_name

  if NAME in []:  # "r8", "r52" and "oh" are imbalanced
    y = np.asarray([label2idx[t] for t in train_label])

    class_weights=torch.tensor(sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y),y=y), dtype=torch.float)
    return class_weights.to(DEVICE)
  else: 
    return None


def ParaTuning(DATASET, SEN_MAX_LEN, DEVICE, EPOCH, lr, MIN_COUNT):  # Parameters tuning

  para = []  # parameters
  acc = []  # accuracy
  dur = []  # time

  train_texts,  train_labels, dev_texts, dev_labels, test_texts, test_labels, label2idx = MyDatasets(DATASET)
  NUM_CLASS = len(label2idx)
  
  vocab_id = buildvocab(train_texts, min_count=MIN_COUNT)
  vocab_id['<unk>']=len(vocab_id)  # for OOV

  BATCH_SIZE=128
  train_batch = batch_hiegnn(train_texts, train_labels, BATCH_SIZE, label2idx, vocab_id)
  dev_batch = batch_hiegnn(dev_texts, dev_labels, BATCH_SIZE, label2idx, vocab_id)
  test_batch = batch_hiegnn(test_texts, test_labels, BATCH_SIZE, label2idx, vocab_id)

  label_weight = LabelWeight(train_labels, label2idx, DATASET)

  for i in range(3):
    tt = time.time()
    model = HieGAT(vocab_id, in_dim=300, hidden_dim=300,  num_classes=NUM_CLASS, sen_max_len=SEN_MAX_LEN, device=DEVICE)
    model.to(DEVICE)
  
    train(model, EPOCH, train_batch, dev_batch, DEVICE, DATASET,  label_weight=label_weight, lr=lr)

    best_model = torch.load(f'/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/model/HieGAT_{DATASET}.pkl')
    res = test(best_model, test_batch, DEVICE)
    print(f'Test accuracy: {res.cpu().numpy():.4f}')

    print(f'Time: {(time.time()-tt)/60:.1f}m, min_count={MIN_COUNT}')
    print(f'*'*50)

    acc.append(res.cpu().numpy())
    dur.append((time.time()-tt)/60)
    para.append(f'lr={lr}, min_count={MIN_COUNT}')

  # comput average
  acc.append(np.mean(acc))
  dur.append(np.mean(dur))
  para.append('average:')

  df = pd.concat([pd.DataFrame({'para': para}), pd.DataFrame({'acc': acc}), pd.DataFrame({'dur': dur})], axis=1)

  df.to_csv(f'/content/drive/MyDrive/Colab_Notebooks/TextLevelGNN/model/HieGAT-{DATASET}.csv')


# Parameters

EPOCH = 100
SEN_MAX_LEN = 500  # sentence max length

min_c = {'20ng':5, 'r8': 3, 'r52':3,  'oh':1, 'mr':1,}  # mini count in vocab
lr = {'20ng':1e-3, 'r8': 1e-3, 'r52':1e-3,  'oh':1e-3, 'mr':1e-4, }  # learning rate

dataset = ['20ng', 'r8',  'r52', 'oh', 'mr', ]


t0 = time.time()
for DATASET in dataset:

  if DATASET not in ['mr']: continue  # check for specific dataset

  MIN_COUNT = min_c[DATASET]
  LR = lr[DATASET]

  ParaTuning(DATASET, SEN_MAX_LEN, DEVICE, EPOCH,  LR,  MIN_COUNT)
  print('='*100)

print(f'Total time: {(time.time()-t0)/60:.4f}mins')

<IPython.core.display.Javascript object>

Dataset: mr, Total train: 7108, Train size: 6397, Dev size: 711, Test size: 3554, Num_class: 2
labels: {'0': 0, '1': 1}
**************************************************
vocab_id size: 4981
**************************************************
Epoch: 0, Train loss:0.6731, Val acc: 0.6751, Times: 12.0499s, *
Epoch: 1, Train loss:0.6512, Val acc: 0.7075, Times: 12.0824s, *
Epoch: 2, Train loss:0.6198, Val acc: 0.7243, Times: 12.3656s, *
Epoch: 3, Train loss:0.6320, Val acc: 0.7314, Times: 12.2930s, *
Epoch: 4, Train loss:0.5869, Val acc: 0.7426, Times: 12.2361s, *
Epoch: 5, Train loss:0.5888, Val acc: 0.7595, Times: 12.1998s, *
Epoch: 6, Train loss:0.5862, Val acc: 0.7496, Times: 12.1631s, 
Epoch: 7, Train loss:0.5450, Val acc: 0.7553, Times: 12.1327s, 
Epoch: 8, Train loss:0.5336, Val acc: 0.7525, Times: 12.1053s, 
Epoch: 9, Train loss:0.5661, Val acc: 0.7595, Times: 12.0958s, 
Epoch: 10, Train loss:0.4941, Val acc: 0.7693, Times: 12.1752s, *
Epoch: 11, Train loss:0.4806, Val acc: 0.7679,

In [None]:
import time

a = [i for i in range(1000000)]

t0 = time.time()
count = 0
for i in range(len(a)):
  count +=1

t1 = time.time()

t2 = time.time()
for i, _ in enumerate(a):
  j = i
t3 = time.time()

print(f'T0: {t1-t0}')
print(f'T2: {t3-t2}')

T0: 0.4392988681793213
T2: 0.20152521133422852
