In [12]:
#Case Study 1 (Word Analysis)
import spacy

nlp = spacy.load("en_core_web_sm")

customer_feedback = [
    "The product is amazing! I love the quality",
    "The customer service is terrible, very bad",
    "Great experience overall, highly recommended.",
    "The delivery was late, very frustrating."
]

def analyze_feedback(feedback):
    for idx, text in enumerate(feedback, start=1):
        doc = nlp(text)
        print(f"\nFeedback {idx}: '{text}'")
        print("Tokens: ", [token.text for token in doc])
        print("Lemmas: ", [token.lemma_ for token in doc])
        print("Dependencies:")
        for token in doc:
            print(f"{token.text} -> {token.dep_} (head: {token.head.text})")

analyze_feedback(customer_feedback)



Feedback 1: 'The product is amazing! I love the quality'
Tokens:  ['The', 'product', 'is', 'amazing', '!', 'I', 'love', 'the', 'quality']
Lemmas:  ['the', 'product', 'be', 'amazing', '!', 'I', 'love', 'the', 'quality']
Dependencies:
The -> det (head: product)
product -> nsubj (head: is)
is -> ROOT (head: is)
amazing -> acomp (head: is)
! -> punct (head: is)
I -> nsubj (head: love)
love -> ROOT (head: love)
the -> det (head: quality)
quality -> dobj (head: love)

Feedback 2: 'The customer service is terrible, very bad'
Tokens:  ['The', 'customer', 'service', 'is', 'terrible', ',', 'very', 'bad']
Lemmas:  ['the', 'customer', 'service', 'be', 'terrible', ',', 'very', 'bad']
Dependencies:
The -> det (head: service)
customer -> compound (head: service)
service -> nsubj (head: is)
is -> ROOT (head: is)
terrible -> acomp (head: is)
, -> punct (head: bad)
very -> advmod (head: bad)
bad -> acomp (head: is)

Feedback 3: 'Great experience overall, highly recommended.'
Tokens:  ['Great', 'experie

In [5]:
!pip install transformers



In [6]:
#Case Study 2(Word Generation)
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

class EmailAutocompleteSystem:
    def __init__(self):
        self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
        self.model = GPT2LMHeadModel.from_pretrained("gpt2")

    def generate_suggestions(self, user_input, context):
        input_text = f"{context} {user_input}"
        input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
        with torch.no_grad():
            output = self.model.generate(input_ids, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2)
        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return generated_text[len(input_text):]  # This will give you only the generated part.

if __name__ == "__main__":
    autocomplete_system = EmailAutocompleteSystem()
    email_context = "Subject: Discussing Project Proposal\nHi [Recipient],"
    while True:
        user_input = input("Enter your sentence (type 'exit' to end): ")
        if user_input.lower() == 'exit':
            break
        suggestions = autocomplete_system.generate_suggestions(user_input, email_context)
        print("Autocomplete Suggestions:", suggestions.strip() if suggestions.strip() else "No suggestions available.")


Enter your sentence (type 'exit' to end):  The world is


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Autocomplete Suggestions: ['Proposal', 'Hi', '[Recipient],', 'The', 'world', 'is', 'full', 'of', 'ideas.', "I'm", 'interested', 'in', 'your', 'ideas,', 'and', "I'd", 'like', 'to', 'hear', 'from', 'you', 'about', 'your', 'project.', 'Please', 'send', 'me', 'a', 'message', 'if', 'you', 'have', 'any', 'questions.']


Enter your sentence (type 'exit' to end):  exit


In [13]:
#Case Study-3(Text Classification)
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

# Load dataset and split
categories = ['comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'rec.autos', 'rec.motorcycles']
data = fetch_20newsgroups(subset='all', categories=categories)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Vectorize data
vectorizer = TfidfVectorizer(stop_words='english', max_features=10000)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train and predict
classifier = LinearSVC().fit(X_train, y_train)
predictions = classifier.predict(X_test)

# Output results
print(f"Accuracy: {classifier.score(X_test, y_test):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, predictions, target_names=data.target_names))


Accuracy: 0.95

Classification Report:
                          precision    recall  f1-score   support

comp.sys.ibm.pc.hardware       0.92      0.92      0.92       183
   comp.sys.mac.hardware       0.93      0.92      0.93       205
               rec.autos       0.97      0.97      0.97       210
         rec.motorcycles       0.97      0.98      0.97       189

                accuracy                           0.95       787
               macro avg       0.95      0.95      0.95       787
            weighted avg       0.95      0.95      0.95       787





In [8]:
#Case Study 4(Semantic_Analysis)
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Initialize NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Function to perform semantic analysis
def semantic_analysis(text):
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    synonyms = set()
    for token in lemmatized_tokens:
        for syn in wordnet.synsets(token):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name())
    return list(synonyms)

# Example customer queries
customer_queries = [
    "I received a damaged product. Can I get a refund?",
    "I'm having trouble accessing my account.",
    "How can I track my order status?",
    "The item I received doesn't match the description.",
    "Is there a discount available for bulk orders?"
]

# Semantic analysis for each query
for query in customer_queries:
    print("Customer Query:", query)
    synonyms = semantic_analysis(query)
    print("Semantic Analysis (Synonyms):", synonyms)
    print("\n")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Customer Query: I received a damaged product. Can I get a refund?
Semantic Analysis (Synonyms): ['draw', 'bugger_off', 'encounter', 'stupefy', 'obtain', 'engender', 'contract', 'pay_off', 'start_out', 'ware', 'grow', 'incur', 'start', 'merchandise', 'scram', 'damaged', 'bewilder', 'sire', 'meet', 'find', 'experience', 'product', 'take_in', 'acquire', 'buzz_off', 'damage', 'set_out', 'bring', 'fetch', 'have', 'discredited', 'return', 'baffle', 'amaze', 'refund', 'suffer', 'welcome', 'repay', 'develop', 'puzzle', 'induce', 'catch', 'stimulate', 'gravel', 'arrest', 'fuck_off', 'bring_forth', 'intersection', 'vex', 'receive', 'repayment', 'begin', 'let', 'production', 'sustain', 'mystify', 'make', 'arrive', 'beget', 'dumbfound', 'pay_back', 'cause', 'capture', 'Cartesian_product', 'go', 'pose', 'take', 'pick_up', 'aim', 'stick', 'become', 'invite', 'produce', 'get', 'flummox', 'come', 'fix', 'give_back', 'drive', 'beat', 'mother', "get_under_one's_skin", 'nonplus', 'set_about', 'convey', '

In [9]:
#Case Study 5(Sentiment Analysis)
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download NLTK resources (only required once)
nltk.download('vader_lexicon')

# Sample reviews
reviews = [
    "This product is amazing! I love it.",
    "The product was good, but the packaging was damaged.",
    "Very disappointing experience. Would not recommend.",
    "Neutral feedback on the product.",
]

# Initialize Sentiment Intensity Analyzer
sid = SentimentIntensityAnalyzer()

# Analyze sentiment for each review
for review in reviews:
    print("Review:", review)
    scores = sid.polarity_scores(review)
    print("Sentiment:", end=' ')
    if scores['compound'] > 0.05:
        print("Positive")
    elif scores['compound'] < -0.05:
        print("Negative")
    else:
        print("Neutral")
    print()

Review: This product is amazing! I love it.
Sentiment: Positive

Review: The product was good, but the packaging was damaged.
Sentiment: Negative

Review: Very disappointing experience. Would not recommend.
Sentiment: Negative

Review: Neutral feedback on the product.
Sentiment: Neutral



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [10]:
#Case Study 6(POS Tagging)
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def pos_tagging(text):
    sentences = sent_tokenize(text)
    tagged_tokens = []
    for sentence in sentences:
        tokens = word_tokenize(sentence)
        tagged_tokens.extend(nltk.pos_tag(tokens))
    return tagged_tokens

def main():
    article_text = """Manchester United secured a 3-1 victory over Chelsea in yesterday's match.
    Goals from Rashford, Greenwood, and Fernandes sealed the win for United.
    Chelsea's only goal came from Pulisic in the first half.
    The victory boosts United's chances in the Premier League title race.
    """
    tagged_tokens = pos_tagging(article_text)
    print("Original Article Text:\n", article_text)
    print("\nParts of Speech Tagging:")
    for token, pos_tag in tagged_tokens:
        print(f"{token}: {pos_tag}")

if __name__ == "__main__":
    main()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Original Article Text:
 Manchester United secured a 3-1 victory over Chelsea in yesterday's match.
    Goals from Rashford, Greenwood, and Fernandes sealed the win for United.
    Chelsea's only goal came from Pulisic in the first half.
    The victory boosts United's chances in the Premier League title race.
    

Parts of Speech Tagging:
Manchester: NNP
United: NNP
secured: VBD
a: DT
3-1: JJ
victory: NN
over: IN
Chelsea: NNP
in: IN
yesterday: NN
's: POS
match: NN
.: .
Goals: NNS
from: IN
Rashford: NNP
,: ,
Greenwood: NNP
,: ,
and: CC
Fernandes: NNP
sealed: VBD
the: DT
win: NN
for: IN
United: NNP
.: .
Chelsea: NN
's: POS
only: JJ
goal: NN
came: VBD
from: IN
Pulisic: NNP
in: IN
the: DT
first: JJ
half: NN
.: .
The: DT
victory: NN
boosts: VBZ
United: NNP
's: POS
chances: NNS
in: IN
the: DT
Premier: NNP
League: NNP
title: NN
race: NN
.: .


In [11]:
#Case Study 7(Chunking)
import nltk
import os

# Set NLTK data path
nltk.data.path.append("/usr/local/share/nltk_data")

# Download the 'punkt' tokenizer model
nltk.download('punkt')

# Download the 'averaged_perceptron_tagger' model
nltk.download('averaged_perceptron_tagger')

# Sample text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the text into words
words = nltk.word_tokenize(text)

# Perform part-of-speech tagging
pos_tags = nltk.pos_tag(words)

# Define chunk grammar
chunk_grammar = r"""
NP: {<DT>?<JJ>*<NN>} # Chunk sequences of DT, JJ, NN
"""

# Create chunk parser
chunk_parser = nltk.RegexpParser(chunk_grammar)

# Apply chunking
chunked_text = chunk_parser.parse(pos_tags)

# Extract noun phrases
noun_phrases = []
for subtree in chunked_text.subtrees(filter=lambda t: t.label() == 'NP'):
    noun_phrases.append(' '.join(word for word, tag in subtree.leaves()))

# Output
print("Original Text:", text)
print("Noun Phrases:")
for phrase in noun_phrases:
    print("-", phrase)

Original Text: The quick brown fox jumps over the lazy dog.
Noun Phrases:
- The quick brown
- fox
- the lazy dog


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
