<a href="https://colab.research.google.com/github/haracharan/Home_Assignment-4_CNN/blob/main/Untitled14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Download all required resources
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('punkt_tab', quiet=True) # Download the punkt_tab data

def nlp_preprocessing(sentence):
    # 1. Tokenization
    tokens = word_tokenize(sentence)
    print("Original Tokens:", tokens)

    # 2. Stopword Removal
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words and word.isalpha()]
    print("Tokens Without Stopwords:", filtered_tokens)

    # 3. Stemming
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
    print("Stemmed Words:", stemmed_tokens)

# Example usage
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."
nlp_preprocessing(sentence)

Original Tokens: ['NLP', 'techniques', 'are', 'used', 'in', 'virtual', 'assistants', 'like', 'Alexa', 'and', 'Siri', '.']
Tokens Without Stopwords: ['NLP', 'techniques', 'used', 'virtual', 'assistants', 'like', 'Alexa', 'Siri']
Stemmed Words: ['nlp', 'techniqu', 'use', 'virtual', 'assist', 'like', 'alexa', 'siri']


In [2]:
import spacy

# Load the English model (small version)
nlp = spacy.load("en_core_web_sm")

# Input sentence
sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."

# Process the sentence
doc = nlp(sentence)

# Extract and print named entities
print("Named Entities:")
for ent in doc.ents:
    print(f"Text: {ent.text}, Label: {ent.label_}, Start: {ent.start_char}, End: {ent.end_char}")


Named Entities:
Text: Barack Obama, Label: PERSON, Start: 0, End: 12
Text: 44th, Label: ORDINAL, Start: 27, End: 31
Text: the United States, Label: GPE, Start: 45, End: 62
Text: the Nobel Peace Prize, Label: WORK_OF_ART, Start: 71, End: 92
Text: 2009, Label: DATE, Start: 96, End: 100


In [3]:
import numpy as np

def softmax(x):
    """Apply softmax function row-wise."""
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))  # For numerical stability
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

def scaled_dot_product_attention(Q, K, V):
    d_k = Q.shape[-1]  # key dimension (number of columns in Q or K)

    # Step 1: Dot product of Q and K transpose
    scores = np.dot(Q, K.T)

    # Step 2: Scale by sqrt(d_k)
    scaled_scores = scores / np.sqrt(d_k)

    # Step 3: Apply softmax to get attention weights
    attention_weights = softmax(scaled_scores)

    # Step 4: Multiply attention weights with V
    output = np.dot(attention_weights, V)

    return attention_weights, output

# Test inputs
Q = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])

K = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])

V = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8]])

# Run attention
weights, output = scaled_dot_product_attention(Q, K, V)

# Print results
print("Attention Weights:\n", weights)
print("\nOutput:\n", output)


Attention Weights:
 [[0.73105858 0.26894142]
 [0.26894142 0.73105858]]

Output:
 [[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]
