In [1]:
import torch
import torch.nn as nn

### 1) 단어 사전 정의

In [3]:
vocab = ["cat", "dog", "book", "run", "eat"]
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i:word for word, i in word_to_idx.items()}

In [4]:
print("단어 사전 :", word_to_idx)

단어 사전 : {'cat': 0, 'dog': 1, 'book': 2, 'run': 3, 'eat': 4}


### 2) One-hot Encoding

In [5]:
def one_hot_encode(word, vocab_size=len(vocab)):
    vec = torch.zeros(vocab_size)
    vec[word_to_idx[word]]=1.0
    return vec

In [6]:
print("\n원-핫 벡터 (dog) :", one_hot_encode("dog"))


원-핫 벡터 (dog) : tensor([0., 1., 0., 0., 0.])


### 3) Embedding Matrix : (5 x 3 크기의 행렬, 단어 5개 x 차원 3)

In [7]:
embedding_dim = 3 # 차원 수 (d)
embedding = nn.Embedding(num_embeddings=len(vocab), embedding_dim=embedding_dim)

In [8]:
print("\n임베딩 행렬 (학습 전 초기 값):")
print(embedding.weight)


임베딩 행렬 (학습 전 초기 값):
Parameter containing:
tensor([[ 1.9409,  0.9565, -0.0133],
        [-0.1943,  0.8660,  0.4790],
        [ 0.1283,  0.0890,  0.5064],
        [ 0.9608,  1.9671, -1.4872],
        [-0.8297,  1.7280,  1.2303]], requires_grad=True)


### 4) Embedding lookup

In [10]:
# 임베딩 벡터를 추출할 단어 설정
word = "dog"

In [12]:
word_idx = torch.tensor([word_to_idx[word]])
embed_vec = embedding(word_idx)

In [13]:
print(f"\n단어 '{word}'의 임베딩 벡터:")
print(embed_vec)


단어 'dog'의 임베딩 벡터:
tensor([[-0.1943,  0.8660,  0.4790]], grad_fn=<EmbeddingBackward0>)


## 은닉층

In [14]:
import torch.nn.functional as F

In [15]:
# 입력 차원 (예 : 단어 임베딩 4차원), 은닉층 3차원
input_dim = 4
hidden_dim = 3

In [16]:
# 샘플 입력 (배치 = 2, 임베딩 차원 = 4)
x = torch.tensor([[0.1,0.2,0.3,0.4],
                  [0.5,0.6,0.7,0.8]])

In [17]:
# 선형 변환 (wx + b)
linear = nn.Linear(input_dim, hidden_dim)
h_linear = linear(x)

In [19]:
print("선형 변환 결과 : ", h_linear)

선형 변환 결과 :  tensor([[0.2165, 0.3851, 0.2727],
        [0.1281, 0.6987, 0.1606]], grad_fn=<AddmmBackward0>)


In [20]:
# 활설화 함수 적용
h_sigmoid = torch.sigmoid(h_linear)
h_tanh = torch.tanh(h_linear)
h_relu = F.relu(h_linear)

### ReLU 함수 : f(x)=max(0,x)
- 입력이 0보다 크면 그대로 출력, 입력이 0이하면 0 출력
- 샘플 입력에 대한 nn.Linear()연산 결과가 모두 0이하 이므로 ReLu 결과 값이 모두 0이 되엇음

In [21]:
print("\n Sigmoid:", h_sigmoid)
print("\n Tanh:", h_tanh)
print("\n Relu:", h_relu)


 Sigmoid: tensor([[0.5539, 0.5951, 0.5678],
        [0.5320, 0.6679, 0.5401]], grad_fn=<SigmoidBackward0>)

 Tanh: tensor([[0.2132, 0.3671, 0.2662],
        [0.1274, 0.6035, 0.1593]], grad_fn=<TanhBackward0>)

 Relu: tensor([[0.2165, 0.3851, 0.2727],
        [0.1281, 0.6987, 0.1606]], grad_fn=<ReluBackward0>)


- 학습 수행 후, 은닉층 활성화 함수 출력 확인
  - x : 임의로 가정한 임베딩 값
  - y : 임의로 가정한 각 샘플의 정답 클래스

In [22]:
import torch.optim as optim

In [23]:
x = torch.tensor([[1.0,2.0,3.0,4.0],
                  [4.0,3.0,2.0,1.0]])
y = torch.tensor([0,2]) # 학습을 위한 정답 클래스

In [24]:
# 모델 정의 : 입력층(4) -> 은닉층(5) -> 출력층(3)

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(4,5)
        self.fc2 = nn.Linear(5,3)

    def forward(self, x):
        hidden = F.relu(self.fc1(x)) # ReLU 활성화
        output = self.fc2(hidden)
        return output, hidden

In [25]:
model = SimpleNN()

- fc2는 출력층의 선형 변환이고, nn.CrossEntropyLoss()에서 Softmax 함수가 암묵적으로 적용됨

In [27]:
# 손실함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [30]:
# 학습 수행
for epoch in range(200):
    optimizer.zero_grad()
    output, hidden = model(x)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()

In [31]:
# 학습 후  출력
output, hidden = model(x)

In [32]:
print("은닉층 출력(ReLU 적용 후):\n", hidden)

은닉층 출력(ReLU 적용 후):
 tensor([[0.0000, 0.0000, 5.2137, 0.0000, 0.0000],
        [0.0000, 6.8489, 1.8178, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)


### 1. 데이터 준비

In [33]:
sentences = [
    ['I','love','NLP','.'],
    ['You','hate','AI',','],
    ['We','study','AI','.'],
    ['I','enjoy','learning','.']
]

단어 사전 / 단어 인덱싱

In [34]:
vocab = list(set(word for sent in sentences for word in sent))
word2idx = {w:i for i, w in enumerate(vocab)}
idx2word = {i:w for w,i in word2idx.items()}
vocab_size = len(vocab)

In [35]:
print(vocab) # 작업 확인용 출력

['study', 'enjoy', 'NLP', 'learning', '.', 'You', 'We', 'I', 'AI', ',', 'hate', 'love']


In [36]:
print(word2idx) # 작업 확인용 출력

{'study': 0, 'enjoy': 1, 'NLP': 2, 'learning': 3, '.': 4, 'You': 5, 'We': 6, 'I': 7, 'AI': 8, ',': 9, 'hate': 10, 'love': 11}


문장 -> 인덱스 시퀀스 변환

In [37]:
def sentence_to_indices(sentence):
    return torch.tensor([word2idx[word] for word in sentence], dtype=torch.long)

In [45]:
X = [sentence_to_indices(sent)[:-1] for sent in sentences] # 입력
Y = [sentence_to_indices(sent)[1:] for sent in sentences] # 다른 단어 예측

In [46]:
print(X)

[tensor([ 7, 11,  2]), tensor([ 5, 10,  8]), tensor([6, 0, 8]), tensor([7, 1, 3])]


In [47]:
print(Y)

[tensor([11,  2,  4]), tensor([10,  8,  9]), tensor([0, 8, 4]), tensor([1, 3, 4])]


In [48]:
# 배치 처리용
def get_batch(X,Y):
    return torch.stack(X), torch.stack(Y) # [batch_size, seq_Len]

In [54]:
batch_X, batch_Y = get_batch(X,Y)

In [55]:
print(batch_X) 

tensor([[ 7, 11,  2],
        [ 5, 10,  8],
        [ 6,  0,  8],
        [ 7,  1,  3]])


In [56]:
print(batch_Y) 

tensor([[11,  2,  4],
        [10,  8,  9],
        [ 0,  8,  4],
        [ 1,  3,  4]])


### 2. 언어 모델 정의

In [59]:
# 기본 AI 기반 언어모델
class MinLM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super(MinLM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.hidden = nn.Linear(embed_size, hidden_size)
        self.output = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        # x: [batch, seq_Len] -> [batch, seq_Len, embed_size]
        x = self.embedding(x)
        h = F.relu(self.hidden(x))
        out = self.output(h) # Logits
        return out

In [60]:
embed_size = 8 #16
hidden_size = 18 #32

In [64]:
model = MinLM(vocab_size, embed_size, hidden_size)

optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [66]:
epochs = 500 # 반복 횟수

for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(batch_X) # [batch, seq_Len, vocab_size]
    loss = criterion(output.view(-1, vocab_size), batch_Y.view(-1))
    loss.backward()
    optimizer.step()

    if (epoch+1) % 20 == 0:
        print(f"Epoch {epoch+1}/{epoch}, Loss: {loss.item():.4f}")

Epoch 20/19, Loss: 1.1106
Epoch 40/39, Loss: 0.3610
Epoch 60/59, Loss: 0.2396
Epoch 80/79, Loss: 0.2340
Epoch 100/99, Loss: 0.2330
Epoch 120/119, Loss: 0.2326
Epoch 140/139, Loss: 0.2323
Epoch 160/159, Loss: 0.2321
Epoch 180/179, Loss: 0.2319
Epoch 200/199, Loss: 0.2318
Epoch 220/219, Loss: 0.2317
Epoch 240/239, Loss: 0.2316
Epoch 260/259, Loss: 0.2316
Epoch 280/279, Loss: 0.2315
Epoch 300/299, Loss: 0.2315
Epoch 320/319, Loss: 0.2314
Epoch 340/339, Loss: 0.2314
Epoch 360/359, Loss: 0.2314
Epoch 380/379, Loss: 0.2313
Epoch 400/399, Loss: 0.2313
Epoch 420/419, Loss: 0.2313
Epoch 440/439, Loss: 0.2313
Epoch 460/459, Loss: 0.2313
Epoch 480/479, Loss: 0.2312
Epoch 500/499, Loss: 0.2312


### 4. 출력1 : 에측 다음 단어

In [69]:
# 1) 테스트 문장 준비
test_sentence = ['I','love','NLP']
# 2) 테스트 문장 인덱싱
test_idx = torch.tensor([word2idx[w] for w in test_sentence], dtype=torch.long)
# 3) 언어모델 입력 -> 다음 단어 확률 출력
with torch.no_grad():
    logits = model(test_idx)
    pred_indices = torch.argmax(logits, dim=1)
    pred_words = [idx2word[idx.item()] for idx in pred_indices]

In [70]:
print("입력 문장:", test_sentence)
print("예측 다음 단어:", pred_words)

입력 문장: ['I', 'love', 'NLP']
예측 다음 단어: ['love', 'NLP', '.']


### 5. 출력2 : 문장 생성

In [72]:
def generate_sentence(model, seed_words, max_len=5):
    model.eval()
    generated = seed_words[:]
    idx_seq = torch.tensor([word2idx[w] for w in seed_words], dtype=torch.long).unsqueeze(0) #[1, seq_len]

    for _ in range(max_len):
        with torch.no_grad():
            logits = model(idx_seq) # [1,seq_len, vocab_size]
            next_word_logits = logits[0,-1] # 마지막 단어 예측
            next_idx = torch.argmax(next_word_logits).item()
            next_word = idx2word[next_idx]
            generated.append(next_word)

            # 다음 입력에 추가
            idx_seq = torch.cat([idx_seq, torch.tensor([[next_idx]])], dim=1)

        return generated

시드 단어로 문장 생성

In [73]:
seed = ['I']

In [76]:
generated_sentence = generate_sentence(model, seed, max_len=5)
print("Seed:", seed)
print("Generated:", generated_sentence)

Seed: ['I']
Generated: ['I', 'love']


bi-gram 기반 단어 예측

In [80]:
# 학습 데이터 (Bigram -> Next word)
def make_bigram_data(sentences):
    inputs, targets = [], []
    for sent in sentences:
        for i in range(len(sent) - 2):
            print("bigram :", sent[i], sent[i+1])
            bigram = [word2idx[sent[i]], word2idx[sent[i+1]]] # 직전 2단어
            print("target :", sent[i+2])
            target = word2idx[sent[i+2]] # 다음 단어

            inputs.append(bigram)
            targets.append(target)
        print("inputs : ", inputs)
        print("target : ", targets)
        return torch.tensor(inputs), torch.tensor(targets)

In [81]:
X, Y = make_bigram_data(sentences)

bigram : I love
target : NLP
bigram : love NLP
target : .
inputs :  [[7, 11], [11, 2]]
target :  [2, 4]


In [82]:
print("X (bigram): ", X)
print("Y (target): ", Y)

X (bigram):  tensor([[ 7, 11],
        [11,  2]])
Y (target):  tensor([2, 4])


In [86]:
# 모델 정의 
class BigramNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(BigramNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.fc1 = nn.Linear(embed_dim * 2, hidden_dim) # 두 단어 벡터 contact
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, vocab_size) # 출력층(단어 분포)

    def forward(self,x):
        embeds = self.embedding(x) # (batch, 2, embed_dim)
        embeds = embeds.view(x.size(0), -1) # 펼치기 (batch, 2*embed_dim)
        hidden = self.relu(self.fc1(embeds))
        out = self.fc2(hidden)
        return out

In [88]:
# 하이퍼파라미터
embed_dim = 8
hidden_dim = 16
Bimodel = BigramNN(vocab_size, embed_dim, hidden_dim)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Bimodel.parameters(), lr=0.01)

In [89]:
# 학습 루프
for epoch in range(200):
    optimizer.zero_grad()
    output = Bimodel(X)
    loss = criterion(output, Y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 50 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 50, Loss: 0.0001
Epoch 100, Loss: 0.0001
Epoch 150, Loss: 0.0001
Epoch 200, Loss: 0.0000


In [90]:
# 테스트 : 다음 단어 예측
def predict_next(w1,w2):
    Bimodel.eval()
    x = torch.tensor([[word2idx[w1], word2idx[w2]]])
    with torch.no_grad():
        output = Bimodel(x)
        pred_idx = torch.argmax(output, dim=1).item()
    return idx2word[pred_idx]

print("\n=== Bigram Prediction ===")
print("Input: (I, love) -> " , predict_next("I","love"))
print("Input: (You, hate) -> ", predict_next("You","hate"))


=== Bigram Prediction ===
Input: (I, love) ->  NLP
Input: (You, hate) ->  .
