<a href="https://colab.research.google.com/github/elangbijak4/AI-General/blob/main/PoC_Protokol_Komunikasi_AutoEnkoder_dengan_Layer_Embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from gensim.models import Word2Vec
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
import numpy as np

In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Import pad_sequences

In [13]:
# Sample sentences
sentences = [
    ['hello', 'world'],
    ['how', 'are', 'you'],
    ['autoencoder', 'communication', 'protocol']
]

# Train a Word2Vec model
w2v_model = Word2Vec(sentences, vector_size=100, min_count=1, window=3, sg=1)

# Function to convert sentence to embedding with padding
def sentence_to_embedding(sentence, model, max_length):
    embedding = []
    for word in sentence:
        if word in model.wv:
            embedding.append(model.wv[word])
        else:
            print(f"Warning: '{word}' is not predefined in the protocol. Skipping.")
            # Optionally, append a zero vector or a special "unknown" vector here

    if len(embedding) < max_length:
        # Pad with zeros if shorter than max_length
        padding = [np.zeros(model.vector_size)] * (max_length - len(embedding))
        embedding.extend(padding)
    elif len(embedding) > max_length:
        # Trim to max_length if longer
        embedding = embedding[:max_length]
    return np.array(embedding)

# Determine maximum sentence length
max_length = max(len(sent) for sent in sentences)

# Prepare training data with padding
x_train = np.array([sentence_to_embedding(sent, w2v_model, max_length) for sent in sentences])

# Flatten the input for the autoencoder
x_train_flat = x_train.reshape((x_train.shape[0], -1))

# Define dimensions
input_dim = x_train_flat.shape[1]
encoding_dim = 50  # Dimension of latent space

# Input placeholder
input_img = Input(shape=(input_dim,))

# Encoder layers
encoded = Dense(encoding_dim, activation='relu')(input_img)

# Decoder layers
decoded = Dense(input_dim, activation='linear')(encoded)

# Autoencoder model
autoencoder = Model(input_img, decoded)

# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(x_train_flat, x_train_flat, epochs=100, batch_size=2, shuffle=True)

# Example of communication protocol steps
def communicate(sentence):
    # Step 1: Tokenize and convert to embeddings
    tokens = sentence.split()
    sentence_embedding = sentence_to_embedding(tokens, w2v_model, max_length).flatten().reshape(1, -1)

    # Step 2: Encode with autoencoder
    encoded_sentence = autoencoder.predict(sentence_embedding).flatten().reshape(1, -1)

    # Simulate transmission (here we skip actual transmission for simplicity)
    received_encoded_sentence = encoded_sentence

    # Step 3: Decode with autoencoder
    decoded_sentence = autoencoder.predict(received_encoded_sentence).reshape(max_length, -1)

    # Step 4: Find closest vectors in embedding space
    closest_words = [w2v_model.wv.similar_by_vector(vec, topn=1)[0][0] for vec in decoded_sentence]

    # Step 5: Reconstruct sentence
    reconstructed_sentence = ' '.join(closest_words)

    return reconstructed_sentence

# Example usage
input_sentence = "hello world"
reconstructed_sentence = communicate(input_sentence)
print("Input sentence:", input_sentence)
print("Reconstructed sentence:", reconstructed_sentence)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [14]:
# Example usage
input_sentence = "i see"
reconstructed_sentence = communicate(input_sentence)
print("Input sentence:", input_sentence)
print("Reconstructed sentence:", reconstructed_sentence)

Input sentence: i see
Reconstructed sentence: hello world protocol
