In [None]:
pip install numpy matplotlib gensim sentence-transformers pillow torch torchvision networkx node2vec librosa

 # Word Embeddings
 Word2Vec is a popular method for generating word embeddings.
 It learns vector representations of words that capture semantic relationships.
 This function turns words into numbers. It takes simple sentences like "cat say meow" and "dog say woof", and creates a list of 10 numbers for each   word. These numbers represent the meaning of the word in a way that the computer can understand. For example, the numbers for "cat" and "dog" might be similar because they're both animals.

 
# Parameters:
- vector_size=10: Dimensionality of the word vectors
- window=5: Maximum distance between current and predicted word within a sentence
- min_count=1: Ignores all words with total frequency lower than this
- workers=4: Number of CPU cores to use for training

In [2]:
from gensim.models import Word2Vec
import numpy as np

In [3]:
def word_embeddings():
    sentences = [['cat', 'say', 'meow'], ['dog', 'say', 'woof']]
    model = Word2Vec(sentences, vector_size=10, window=5, min_count=1, workers=4)
    print("Word Embedding for 'cat':", model.wv['cat'])
    print("Word Embedding for 'dog':", model.wv['dog'])

word_embeddings()

Word Embedding for 'cat': [-0.0960355   0.05007293 -0.08759586 -0.04391825 -0.000351   -0.00296181
 -0.0766124   0.09614743  0.04982058  0.09233143]
Word Embedding for 'dog': [ 0.07311766  0.05070262  0.06757693  0.00762866  0.06350891 -0.03405366
 -0.00946401  0.05768573 -0.07521638 -0.03936104]


# Sentence Embeddings
SentenceTransformer is a library for state-of-the-art sentence embeddings.
It's based on BERT architecture and fine-tuned for generating sentence embeddings.
'paraphrase-MiniLM-L6-v2' is the name of the pre-trained model being used.

This function does the same thing, but for entire sentences. It takes sentences like "This is an example sentence" and turns them into a list of numbers. These numbers represent the meaning of the whole sentence.

In [5]:

from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt

In [6]:
def sentence_embeddings():
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    sentences = ["This is an example sentence", "Each sentence is converted to a vector"]
    embeddings = model.encode(sentences)
    print("Sentence Embedding shape:", embeddings.shape)
    print("First sentence embedding:", embeddings[0][:5])  # First 5 dimensions

sentence_embeddings()

Sentence Embedding shape: (2, 384)
First sentence embedding: [0.06735881 0.783936   0.27001837 0.0958027  0.38993028]


In [7]:
from PIL import Image
import torch
import torchvision.models as models
import torchvision.transforms as transforms

In [8]:
# ResNet18 is a convolutional neural network architecture
# It's designed to handle the vanishing gradient problem in deep networks
# pretrained=True: Uses weights pre-trained on ImageNet
# Image transformation pipeline


In [9]:

def image_embeddings():
    
    model = models.resnet18(pretrained=True)
    
    model = torch.nn.Sequential(*(list(model.children())[:-1]))  # Remove last fully connected layer
    model.eval()
    
    
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    img = Image.open("car_img.jpg")  
    img_tensor = transform(img).unsqueeze(0)
    
    with torch.no_grad():
        embedding = model(img_tensor).squeeze()
    
    print("Image Embedding shape:", embedding.shape)
    print("First few dimensions of image embedding:", embedding[:5])

image_embeddings()



Image Embedding shape: torch.Size([512])
First few dimensions of image embedding: tensor([0.3197, 2.5957, 1.1854, 1.3997, 1.1710])


# Graph Embeddings
Node2Vec is an algorithmic framework for learning continuous feature representations for nodes in networks
# Parameters:
dimensions=64: Dimensionality of the node embeddings
walk_length=30: Length of walk per source
num_walks=200: Number of walks per source
workers=4: Number of CPU cores to use
Additional parameters for fit:
- window=10: Maximum distance between the current and predicted node in the random walk
- min_count=1: Ignores all nodes with total frequency lower than this
- batch_words=4: Number of words to be processed in a single batch

In [11]:
from node2vec import Node2Vec
from tqdm import tqdm
import networkx as nx


In [12]:
def graph_embeddings():
    G = nx.karate_club_graph()
    
    node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4, quiet=True)  
    # quiet=True disables tqdm.notebook
    
    model = node2vec.fit(window=10, min_count=1, batch_words=4)
    print("Graph Embedding for node 0:", model.wv['0'])
    
graph_embeddings()    

Graph Embedding for node 0: [ 2.80140370e-01 -4.83708419e-02  1.10683531e-01  5.47424797e-03
 -3.56500149e-02  5.77721186e-02 -2.94917077e-02  2.22904339e-01
  1.14278786e-01 -2.09032863e-01  1.72905222e-01 -1.77412242e-01
  1.81383133e-01  1.32051751e-01 -6.11430146e-02  3.27189639e-03
 -1.27715796e-01 -4.02447172e-02 -6.64173737e-02  3.43594939e-01
  8.59349594e-02  6.46189377e-02  2.36072257e-01 -2.51780897e-02
 -1.30023420e-01  1.97087064e-01 -9.22510307e-03 -1.71496764e-01
  1.42524719e-01 -4.09370810e-02  2.30351612e-02 -5.56365959e-02
 -1.49412230e-01 -2.85502791e-01 -2.87243966e-02  1.11532755e-01
 -2.01834161e-02  8.94406140e-02  2.18878105e-01  2.12365031e-01
  1.30990177e-01 -1.60938129e-01  1.24140881e-01 -1.17639706e-01
 -6.62390068e-02 -7.92074949e-02 -1.15524307e-02 -3.65299702e-01
 -6.13898635e-02  5.93779162e-02  9.47233886e-02  1.95790544e-01
 -2.49083241e-04  2.34282553e-01  6.25202134e-02  1.16826773e-01
  1.99904129e-01 -1.85254037e-01  3.18258643e-01 -3.49126816e-

# Audio Embeddings
This uses Mel-frequency cepstral coefficients (MFCCs)
MFCCs are commonly used features in speech and audio processing
# Parameters:
 - y: The input audio time series
 - sr: The sampling rate of y
 - n_mfcc=13: The number of MFCCs to return

In [15]:
import librosa

In [17]:
def audio_embeddings():
    
    audio_path = "sample_auadio.wav"
    y, sr = librosa.load(audio_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    mfcc_embedding = np.mean(mfccs.T, axis=0)
    
    print("Audio Embedding (MFCC):", mfcc_embedding)

audio_embeddings()

  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


Audio Embedding (MFCC): [-4.6306921e+02  1.4450919e+02  4.0705360e+01  4.2874289e+00
 -7.7825103e+00 -1.4643743e+01 -1.3492144e+01 -8.2774878e+00
 -1.9758449e+00  1.6375064e+00  2.8817804e+00  2.0075905e+00
  1.2477791e-01]
