In [9]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.metrics import *

In [10]:
df = pd.read_csv('../data/Language.csv')
df.head()

Unnamed: 0,Text,Language
0,"Nature, in the broadest sense, is the natural...",English
1,"""Nature"" can refer to the phenomena of the phy...",English
2,"The study of nature is a large, if not the onl...",English
3,"Although humans are part of nature, human acti...",English
4,[1] The word nature is borrowed from the Old F...,English


In [11]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Text'])
sequences = tokenizer.texts_to_sequences(df['Text'])

MAX_LEN = 200
padded_sequences = pad_sequences(sequences, maxlen=MAX_LEN)


In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=10000, embedding_dim=128)
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        
    def forward_once(self, x):
        x = self.embedding(x)
        x = torch.mean(x, dim=1)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_once(x1)
        x2 = self.forward_once(x2)
        distance = torch.sqrt(torch.sum(torch.pow((x1 - x2), 2), dim=1))
        return distance
        
class LanguageDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels
        
    def __getitem__(self, index):
        text1 = self.texts[index]
        label1 = self.labels[index]
        positive_indices = np.where(self.labels == label1)[0]
        negative_indices = np.where(self.labels != label1)[0]
        np.random.shuffle(positive_indices)
        np.random.shuffle(negative_indices)
        text2_positive = self.texts[positive_indices[0]]
        text2_negative = self.texts[negative_indices[0]]
        return (text1, text2_positive, 0), (text1, text2_negative, 1)
        
    def __len__(self):
        return len(self.texts)

def train_siamese_network(train_loader, val_loader, model, criterion, optimizer, device, num_epochs):
    for epoch in range(num_epochs):
        train_loss = 0.0
        model.train()
        for i, (batch_pos, batch_neg) in enumerate(train_loader):
            x1_pos, x2_pos, y_pos = batch_pos
            x1_neg, x2_neg, y_neg = batch_neg
            
            x1 = torch.cat([x1_pos, x1_neg]).to(device)
            x2 = torch.cat([x2_pos, x2_neg]).to(device)
            y = torch.cat([y_pos, y_neg]).to(device)

            optimizer.zero_grad()

            output = model(x1, x2)

            loss = criterion(output, y)
            train_loss += loss.item()

            loss.backward()
            optimizer.step()

        train_loss /= len(train_loader)
        val_loss, val_acc = evaluate_siamese_network(val_loader, model, criterion, device)

        print(f"Epoch {epoch+1} -- Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

def evaluate_siamese_network(val_loader, model, criterion, device):
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    with torch.no_grad():
        for batch_pos, batch_neg in val_loader:
            x1_pos, x2_pos, y_pos = batch_pos
            x1_neg, x2_neg, y_neg = batch_neg
            
            x1 = torch.cat([x1_pos, x1_neg]).to(device)
            x2 = torch.cat([x2_pos, x2_neg]).to(device)
            y = torch.cat([y_pos, y_neg]).to(device)

            outputs = model(x1, x2)
            loss = criterion(outputs, y)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_acc += torch.sum(preds == y).item()

    val_loss /= len(val_loader)
    val_acc /= len(val_loader.dataset)

    return val_loss, val_acc


In [12]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=MAX_LEN))
model.add(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

2023-03-31 12:34:43.145977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-03-31 12:34:43.147932: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-31 12:34:43.148162: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (n00b): /proc/driver/nvidia/version does not exist
2023-03-31 12:34:43.152857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 64)           3117248   
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 3,150,337
Trainable params: 3,150,337
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Encode the Language column
le = LabelEncoder()
df['Language'] = le.fit_transform(df['Language'])


In [15]:
model.fit(padded_sequences, df['Language'], epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2349181520>

In [26]:
def clean_text(text):
    # Remove all the special characters
    text = re.sub(r'\W', ' ', str(text)) 
    # remove numbers
    text = re.sub(r'\d', ' ', str(text))      
    text = text.lower()
    return text

In [1]:
df['cleaned_text'] = df['Text'].apply(clean_text)
df.tail()

NameError: name 'df' is not defined

In [2]:
X = df1['cleaned_text']
y = df1['Language']

le = LabelEncoder()
y = le.fit_transform(y)

cv = CountVectorizer()
X = cv.fit_transform(X).toarray()

NameError: name 'df1' is not defined

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [28]:
# # find the number of Texts in each language
# df.groupby('Language').count()

# # ['English', 'Malayalam', 'Hindi', 'Tamil', 'Portugeese', 'French','Dutch', 'Spanish', 'Greek', 'Russian', 'Danish', 'Italian', 'Turkish', 'Sweedish', 'Arabic', 'German', 'Kannada']
# English = df[df['Language'] == 'English']
# Malayalam = df[df['Language'] == 'Malayalam']
# Hindi = df[df['Language'] == 'Hindi']
# Tamil = df[df['Language'] == 'Tamil']
# Portugeese = df[df['Language'] == 'Portugeese']
# French = df[df['Language'] == 'French']
# Dutch = df[df['Language'] == 'Dutch']
# Spanish = df[df['Language'] == 'Spanish']
# Greek = df[df['Language'] == 'Greek']
# Russian = df[df['Language'] == 'Russian']
# Danish = df[df['Language'] == 'Danish']
# Italian = df[df['Language'] == 'Italian']
# Turkish = df[df['Language'] == 'Turkish']
# Sweedish = df[df['Language'] == 'Sweedish']
# Arabic = df[df['Language'] == 'Arabic']
# German = df[df['Language'] == 'German']
# Kannada = df[df['Language'] == 'Kannada']

# sizeEnglish = len(English)
# sizeMalayalam = len(Malayalam)
# sizeHindi = len(Hindi)
# sizeTamil = len(Tamil)
# sizePortugeese = len(Portugeese)
# sizeFrench = len(French)
# sizeDutch = len(Dutch)
# sizeSpanish = len(Spanish)
# sizeGreek = len(Greek)
# sizeRussian = len(Russian)
# sizeDanish = len(Danish)
# sizeItalian = len(Italian)
# sizeTurkish = len(Turkish)
# sizeSweedish = len(Sweedish)
# sizeArabic = len(Arabic)
# sizeGerman = len(German)
# sizeKannada = len(Kannada)


In [29]:
# # divide the df into df1 df2 and df3 with 3 equal parts of each language

# import random
# df1 = pd.DataFrame()
# df2 = pd.DataFrame()
# df3 = pd.DataFrame()

# df1 = pd.concat([df1, English.sample(n = sizeEnglish//3, random_state = 1)])
# df1 = pd.concat([df1, Malayalam.sample(n = sizeMalayalam//3, random_state = 1)])
# df1 = pd.concat([df1, Hindi.sample(n = sizeHindi//3, random_state = 1)])
# df1 = pd.concat([df1, Tamil.sample(n = sizeTamil//3, random_state = 1)])
# df1 = pd.concat([df1, Portugeese.sample(n = sizePortugeese//3, random_state = 1)])
# df1 = pd.concat([df1, French.sample(n = sizeFrench//3, random_state = 1)])
# df1 = pd.concat([df1, Dutch.sample(n = sizeDutch//3, random_state = 1)])
# df1 = pd.concat([df1, Spanish.sample(n = sizeSpanish//3, random_state = 1)])
# df1 = pd.concat([df1, Greek.sample(n = sizeGreek//3, random_state = 1)])
# df1 = pd.concat([df1, Russian.sample(n = sizeRussian//3, random_state = 1)])
# df1 = pd.concat([df1, Danish.sample(n = sizeDanish//3, random_state = 1)])
# df1 = pd.concat([df1, Italian.sample(n = sizeItalian//3, random_state = 1)])
# df1 = pd.concat([df1, Turkish.sample(n = sizeTurkish//3, random_state = 1)])
# df1 = pd.concat([df1, Sweedish.sample(n = sizeSweedish//3, random_state = 1)])
# df1 = pd.concat([df1, Arabic.sample(n = sizeArabic//3, random_state = 1)])
# df1 = pd.concat([df1, German.sample(n = sizeGerman//3, random_state = 1)])
# df1 = pd.concat([df1, Kannada.sample(n = sizeKannada//3, random_state = 1)])

# df2 = pd.concat([df2, English.sample(n = sizeEnglish//3, random_state = 2)])
# df2 = pd.concat([df2, Malayalam.sample(n = sizeMalayalam//3, random_state = 2)])
# df2 = pd.concat([df2, Hindi.sample(n = sizeHindi//3, random_state = 2)])
# df2 = pd.concat([df2, Tamil.sample(n = sizeTamil//3, random_state = 2)])
# df2 = pd.concat([df2, Portugeese.sample(n = sizePortugeese//3, random_state = 2)])
# df2 = pd.concat([df2, French.sample(n = sizeFrench//3, random_state = 2)])
# df2 = pd.concat([df2, Dutch.sample(n = sizeDutch//3, random_state = 2)])
# df2 = pd.concat([df2, Spanish.sample(n = sizeSpanish//3, random_state = 2)])
# df2 = pd.concat([df2, Greek.sample(n = sizeGreek//3, random_state = 2)])
# df2 = pd.concat([df2, Russian.sample(n = sizeRussian//3, random_state = 2)])
# df2 = pd.concat([df2, Danish.sample(n = sizeDanish//3, random_state = 2)])
# df2 = pd.concat([df2, Italian.sample(n = sizeItalian//3, random_state = 2)])
# df2 = pd.concat([df2, Turkish.sample(n = sizeTurkish//3, random_state = 2)])
# df2 = pd.concat([df2, Sweedish.sample(n = sizeSweedish//3, random_state = 2)])
# df2 = pd.concat([df2, Arabic.sample(n = sizeArabic//3, random_state = 2)])
# df2 = pd.concat([df2, German.sample(n = sizeGerman//3, random_state = 2)])
# df2 = pd.concat([df2, Kannada.sample(n = sizeKannada//3, random_state = 2)])

# df3 = pd.concat([df3, English.sample(n = sizeEnglish//3, random_state = 3)])
# df3 = pd.concat([df3, Malayalam.sample(n = sizeMalayalam//3, random_state = 3)])
# df3 = pd.concat([df3, Hindi.sample(n = sizeHindi//3, random_state = 3)])
# df3 = pd.concat([df3, Tamil.sample(n = sizeTamil//3, random_state = 3)])
# df3 = pd.concat([df3, Portugeese.sample(n = sizePortugeese//3, random_state = 3)])
# df3 = pd.concat([df3, French.sample(n = sizeFrench//3, random_state = 3)])
# df3 = pd.concat([df3, Dutch.sample(n = sizeDutch//3, random_state = 3)])
# df3 = pd.concat([df3, Spanish.sample(n = sizeSpanish//3, random_state = 3)])
# df3 = pd.concat([df3, Greek.sample(n = sizeGreek//3, random_state = 3)])
# df3 = pd.concat([df3, Russian.sample(n = sizeRussian//3, random_state = 3)])
# df3 = pd.concat([df3, Danish.sample(n = sizeDanish//3, random_state = 3)])
# df3 = pd.concat([df3, Italian.sample(n = sizeItalian//3, random_state = 3)])
# df3 = pd.concat([df3, Turkish.sample(n = sizeTurkish//3, random_state = 3)])
# df3 = pd.concat([df3, Sweedish.sample(n = sizeSweedish//3, random_state = 3)])
# df3 = pd.concat([df3, Arabic.sample(n = sizeArabic//3, random_state = 3)])
# df3 = pd.concat([df3, German.sample(n = sizeGerman//3, random_state = 3)])
# df3 = pd.concat([df3, Kannada.sample(n = sizeKannada//3, random_state = 3)])

# # df1.to_csv('../data/df1.csv', index = False)
# # df2.to_csv('../data/df2.csv', index = False)
# # df3.to_csv('../data/df3.csv', index = False)


In [31]:
# split df1 into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2752, 19296) (688, 19296) (2752,) (688,)


In [32]:
def eval(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print('Accuracy:', accuracy_score(y_test, y_pred))
    print('Precision:', precision_score(y_test, y_pred, average = 'weighted'))
    print('Recall:', recall_score(y_test, y_pred, average = 'weighted'))
    print('F1:', f1_score(y_test, y_pred, average = 'weighted'))
    print('Confusion Matrix:')
    print(confusion_matrix(y_test, y_pred))
    
    # model = SVC()
    # eval(model, X_train, X_test, y_train, y_test)



In [34]:
from transformers import BartTokenizer, BartForSequenceClassification
import torch

tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-mnli')
model = BartForSequenceClassification.from_pretrained('facebook/bart-large-mnli')

def get_bart_embeddings(text):
    input_ids = tokenizer.encode(text, return_tensors='pt')
    outputs = model(input_ids)
    last_hidden_state = outputs.last_hidden_state
    return last_hidden_state

def create_siamese_model(embedding_dim):
    # Define the two input layers
    input_a = Input(shape=(embedding_dim,))
    input_b = Input(shape=(embedding_dim,))

    # Define the shared embedding layer
    shared_embedding_layer = Sequential([
        Dense(512, activation='relu'),
        Dropout(0.2),
        Dense(256, activation='relu'),
        Dropout(0.2),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(embedding_dim)
    ])

    # Compute the embeddings for the two inputs
    embedding_a = shared_embedding_layer(input_a)
    embedding_b = shared_embedding_layer(input_b)

    # Define the lambda layer to compute the absolute difference between the two embeddings
    difference_layer = Lambda(lambda x: K.abs(x[0] - x[1]))

    # Compute the absolute difference between the two embeddings
    difference = difference_layer([embedding_a, embedding_b])

    # Define the output layer to predict if the two embeddings are similar or not
    output_layer = Dense(1, activation='sigmoid')(difference)

    # Define the siamese model with the two input layers and the output layer
    siamese_model = Model(inputs=[input_a, input_b], outputs=output_layer)

    return siamese_model

# Get the BART embeddings for the text data
X = df1['cleaned_text'].apply(get_bart_embeddings)
X = np.array(X.tolist())

# Reshape the embeddings to a 2D shape
X = X.reshape(X.shape[0], -1)

# Use siamese network to learn a mapping of the embeddings to a 17D space
siamese_net = create_siamese_model(embedding_dim=X.shape[1])
siamese_net.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy')
siamese_net.fit([X, X], y, batch_size=32, epochs=10, verbose=1)

# Use k-means clustering to cluster the embeddings into 17 groups
kmeans = KMeans(n_clusters=17, n_init=10, max_iter=300)
kmeans.fit(siamese_net.predict([X, X]))

# Map each text to its corresponding cluster label
df1['cluster'] = kmeans.labels_

# Print the language for each cluster
for i in range(17):
    cluster_df = df1[df1['cluster'] == i]
    language = cluster_df['Language'].value_counts().idxmax()
    print(f"Cluster {i}: {language}")


Widget Javascript not detected.  It may not be installed or enabled properly. Reconnecting the current kernel may help.


AttributeError: 'FloatProgress' object has no attribute 'style'

In [None]:
models = [LinearRegression(), RandomForestClassifier(), LogisticRegression(), DecisionTreeClassifier(), GaussianNB(), KNeighborsClassifier(), SVC(), XGBClassifier()]
for model in models:
    eval(model, X_train, X_test, y_train, y_test)

ValueError: Classification metrics can't handle a mix of multiclass and continuous targets

In [None]:
def eval(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f'{model}:')
    print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
    print(f'Precision: {precision_score(y_test, y_pred, average="macro")}')
    print(f'Recall: {recall_score(y_test, y_pred, average="macro")}')
    print(f'F1: {f1_score(y_test, y_pred, average="macro")}')
    # use cm from sklearn.metrics
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 10))
    # greys 
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()
    plt.clf()

In [None]:
from bert_embedding import BertEmbedding


bert_abstract = """We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers.
 Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations by jointly conditioning on both left and right context in all layers.
 As a result, the pre-trained BERT representations can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. 
BERT is conceptually simple and empirically powerful. 
It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE benchmark to 80.4% (7.6% absolute improvement), MultiNLI accuracy to 86.7 (5.6% absolute improvement) and the SQuAD v1.1 question answering Test F1 to 93.2 (1.5% absolute improvement), outperforming human performance by 2.0%."""
sentences = bert_abstract.split('\n')
bert_embedding = BertEmbedding()
result = bert_embedding(sentences)

# number of input in given model
models = []

ModuleNotFoundError: No module named 'bert_embedding'

In [1]:
mport os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from tensorflow import keras
from keras import layers

dataset = pd.read_csv('Language Detection.csv')

# Preprocess the text data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(dataset['Text'])

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
train_dataset = dataset[:train_size]
val_dataset = dataset[train_size:]

# contrastive learning
# take embedding of each sentence and use principle component analysis to reduce dimensionality
# plot the embeddings in 3D space and see if the clusters are well separated
# use k-means clustering to cluster the embeddings into 17 groups


class LanguageDetectionModel(keras.Model):
    def __init__(self, num_classes=17):
        super(LanguageDetectionModel, self).__init__()
        self.embedding = layers.Embedding(input_dim=10000, output_dim=128)
        self.gru = layers.GRU(32)
        self.classifier = layers.Dense(num_classes)

    def call(self, inputs):
        x = self.embedding(inputs)
        x = self.gru(x)
        return self.classifier(x)

# Create the model
model = LanguageDetectionModel()
model.compile(

SyntaxError: invalid syntax (3761547776.py, line 1)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt

# define a dataset for pairs of embeddings
class EmbeddingPairsDataset(Dataset):
    def __init__(self, pairs, labels):
        self.pairs = pairs
        self.labels = labels
        
    def __len__(self):
        return len(self.pairs)
    
    def __getitem__(self, idx):
        x1, x2, label = self.pairs[idx], self.labels[idx]
        return x1, x2, label

# create positive and negative pairs of embeddings for language detection
def create_language_pairs(embeddings, labels):
    pairs = []
    pair_labels = []
    for i in range(len(embeddings)):
        for j in range(i+1, len(embeddings)):
            if labels[i] == labels[j]:
                pairs.append((embeddings[i], embeddings[j]))
                pair_labels.append(1)
            else:
                pairs.append((embeddings[i], embeddings[j]))
                pair_labels.append(0)
    return pairs, pair_labels

# define a Siamese network for contrastive learning
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=768):
        super(SiameseNetwork, self).__init__()
        self.embedding_dim = embedding_dim
        self.fc1 = nn.Linear(embedding_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        
    def forward_once(self, x):
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.fc2(x)
        x = nn.ReLU()(x)
        x = self.fc3(x)
        x = nn.ReLU()(x)
        x = self.fc4(x)
        x = nn.ReLU()(x)
        return x
        
    def forward(self, x1, x2):
        x1 = self.forward_once(x1)
        x2 = self.forward_once(x2)
        return x1, x2
    
# define a contrastive loss function
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
        
    def forward(self, x1, x2, label):
        distance = nn.functional.pairwise_distance(x1, x2)
        loss = torch.mean((label.float()) * torch.pow(distance, 2) + 
                          (1 - label.float()) * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
        return loss

# create a function to train the Siamese network
def train_siamese_network(siamese_network, train_loader, optimizer, criterion, device):
    siamese_network.train()
    running_loss = 0.0
    for batch_idx, (data1, data2, label) in enumerate(train_loader):
        data1, data2, label = data1.to(device), data2.to(device), label.to(device)
        optimizer.zero_grad()
        output1, output2 = siamese_network(data1, data2)
        loss = criterion(output1, output2, label)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# create a function to evaluate the Siamese network
def evaluate_siamese_network(siamese_network, test_loader, criterion, device):
    siamese_network.eval()
    running_loss = 0.0
    with torch.no_grad():
        for batch_idx, (data1, data2, label) in enumerate(test_loader):
            data1, data2, label = data1.to(device), data2.to(device), label.to(device)
            output1, output2 = siamese_network(data1, data2)
            loss = criterion(output1, output2, label)
            running_loss += loss.item()
    return running_loss / len(test_loader)

# create a function to plot the loss
def plot_loss(train_loss, test_loss):
    plt.plot(train_loss, label='train loss')
    plt.plot(test_loss, label='test loss')
    plt.legend()
    plt.show()

# create a function to plot the embeddings
def plot_embeddings(embeddings, labels):
    plt.figure(figsize=(10, 10))
    for i in range(len(embeddings)):
        x, y = embeddings[i, 0], embeddings[i, 1]
        plt.scatter(x, y)
        plt.annotate(labels[i], xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
    plt.show()

# create a function to plot the embeddings in 3D space
def plot_embeddings_3d(embeddings, labels):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')
    for i in range(len(embeddings)):
        x, y, z = embeddings[i, 0], embeddings[i, 1], embeddings[i, 2]
        ax.scatter(x, y, z)
        ax.text(x, y, z, labels[i])
    plt.show()

# create a function evaluate the Siamese network
def evaluate_siamese_network(siamese_network, test_loader, criterion, device):
    siamese_network.eval()
    running_loss = 0.0
    with torch.no_grad():
        for batch_idx, (data1, data2, label) in enumerate(test_loader):
            data1, data2, label = data1.to(device), data2.to(device), label.to(device)
            output1, output2 = siamese_network(data1, data2)
            loss = criterion(output1, output2, label)
            running_loss += loss.item()
    return running_loss / len(test_loader)
