<a href="https://colab.research.google.com/github/ampehta/MLP-applications-on-Language-Models/blob/main/MLP_LM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers

In [3]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from transformers import AutoTokenizer
  
tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

In [4]:
vocab_size = tokenizer.vocab_size
embedding_dimension = 768 

In [5]:
class MLPNet(tf.keras.Model):
    def __init__(self,vocab_size,embedding_dimension,block_n):
        super(MLPNet,self).__init__()
        self.embed_layer = tf.keras.layers.Embedding(vocab_size,embedding_dimension)
        self.stacked_MLPBlock = tf.keras.Sequential([MLPBlock() for n in range(block_n)])#tf.keras.Sequential([MLPBlock() for n in range(3)])

    def call(self,x):
        x = self.embed_layer(x) # (bs,512,768)
        x = self.stacked_MLPBlock(x)

        return x

class MLPBlock(tf.keras.Model):
    def __init__(self):
        super(MLPBlock,self).__init__()
        self.conv = tf.keras.Sequential([tf.keras.layers.Conv2D(5,kernel_size=2),
                                         tf.keras.layers.Conv2D(1,kernel_size=5)])

        self.dense_wise = tf.keras.Sequential([
                                               tf.keras.layers.Dense(768*4,activation='relu'),
                                               tf.keras.layers.Dense(768/2,activation='relu'),
                                               tf.keras.layers.Dense(768,activation='relu'),
                                               tf.keras.layers.LayerNormalization()])
        self.feature_wise = tf.keras.Sequential([
                                                 tf.keras.layers.Dense(512*4,activation='relu'),
                                                 tf.keras.layers.Dense(512/2,activation='relu'),       
                                                 tf.keras.layers.Dense(512,activation='relu'),
                                                 tf.keras.layers.LayerNormalization()])

    
    def call(self,x): #(bs,512,768)

        x1 , x2 ,x3 = tf.split(x,3,2)
        x = tf.stack([x1,x2,x3],-1) # cnn 용으로 (bs,512,768) -> (bs,3,512,-1)
        x = self.conv(x)
        x = tf.squeeze(x)

        x = self.dense_wise(x)
        x = tf.transpose(x,[0,2,1])

        x = self.feature_wise(x)
        x = tf.transpose(x,[0,2,1]) # 원상 복귀 

        return x


# Benchmark : KLUE TopicClassification dataset


In [8]:
class MLPBNet_for_tc(tf.keras.Model):
    def __init__(self,vocab_size,output_dimension,block_n):
        super(MLPBNet_for_tc,self).__init__()
        self.MLPNet = MLPNet(vocab_size,output_dimension,block_n)
        self.classifier_head = tf.keras.Sequential([
                                                tf.keras.layers.LSTM(768,return_sequences=True),
                                                tf.keras.layers.LSTM(2048),
                                                tf.keras.layers.Dense(512,activation='relu'),
                                                tf.keras.layers.Dense(7)])
    def call(self,x):
        x = self.MLPNet(x)
        x = self.classifier_head(x)
        return x
    

In [6]:
klue_tc = pd.read_json('https://raw.githubusercontent.com/KLUE-benchmark/KLUE/main/klue_benchmark/ynat-v1/ynat-v1_train.json')

In [37]:
X = klue_tc['title'].values
y = klue_tc['label'].values

le = preprocessing.LabelEncoder()
le.fit(['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치'])
y = le.transform(y)

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=1210)

X_train = X_train.tolist()
X_test = X_test.tolist()
y_train = y_train.tolist()
y_test = y_test.tolist()


In [38]:
X_train = tokenizer.batch_encode_plus(X_train,padding='max_length',return_tensors='tf')['input_ids']
X_test = tokenizer.batch_encode_plus(X_test,padding='max_length',return_tensors='tf')['input_ids']

X_train = tf.reshape(tf.constant(X_train),(-1,512))
X_test = tf.reshape(tf.constant(X_test),(-1,512))

y_train = tf.reshape(tf.constant(y_train),(-1,1))
y_test = tf.reshape(tf.constant(y_test),(-1,1))

def make_tfdataset(x,y,batch_size):
    dataset_x = tf.data.Dataset.from_tensor_slices(x) 
    dataset_y = tf.data.Dataset.from_tensor_slices(y)
    dataset = tf.data.Dataset.zip((dataset_x, dataset_y))
    dataset = dataset.batch(batch_size)
    return dataset

train_dataset = make_tfdataset(X_train,y_train,12)
test_dataset = make_tfdataset(X_test,y_test,12)

In [39]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def loss(model, x, y):
  y_ = model(x)
  return loss_object(y_true=y, y_pred=y_)

def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return loss_value, tape.gradient(loss_value, model.trainable_variables)


In [None]:
TCNet = MLPBNet_for_tc(tokenizer.vocab_size,768,12)
"""
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

TCNet.compile(optimizer=opt, loss=loss)
"""

train_loss_results = []
train_accuracy_results = []

num_epochs = 201
iter = 0

for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

  # 훈련 루프 - 32개의 배치를 사용합니다.
    for x, y in train_dataset:
    # 모델을 최적화합니다.
        print(iter)
        iter+=1
        loss_value, grads = grad(TCNet, x, y)
        optimizer.apply_gradients(zip(grads, TCNet.trainable_variables))

    # 진행 상황을 추적합니다.
        epoch_loss_avg(loss_value)  # 현재 배치 손실을 추가합니다.
    # 예측된 레이블과 실제 레이블 비교합니다.
        epoch_accuracy(y, TCNet(x))
        if iter % 20 == 0:
            print("에포크 {:03d} iteration:{} 손실: {:.3f}, 정확도: {:.3%}".format(epoch,iter,
                                                                epoch_loss_avg.result(),
                                                                epoch_accuracy.result()))

  # epoch 종료
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())

    

1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
에포크 000 iteration:1780 손실: 1.939, 정확도: 17.893%
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
에포크 000 iteration:1800 손실: 1.939, 정확도: 17.935%
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
에포크 000 iteration:1820 손실: 1.939, 정확도: 17.930%
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
에포크 000 iteration:1840 손실: 1.939, 정확도: 17.899%
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
에포크 000 iteration:1860 손실: 1.939, 정확도: 17.885%
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
에포크 000 iteration:1880 손실: 1.939, 정확도: 17.895%
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
에포크 000 iteration:1900 손실: 1.939,