<a href="https://colab.research.google.com/github/ayushiiii28/Meta-Scifor/blob/main/NLP2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
#Text Blob: Simplified Text Processing
from textblob import TextBlob

In [9]:
wiki = TextBlob("I love Natural Language Processing, not you!")
wiki.tags

[('I', 'PRP'),
 ('love', 'VBP'),
 ('Natural', 'JJ'),
 ('Language', 'NNP'),
 ('Processing', 'NNP'),
 ('not', 'RB'),
 ('you', 'PRP')]

In [10]:
wiki.noun_phrases

WordList(['language processing'])

In [11]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [12]:
testimonial.sentiment.subjectivity

0.4357142857142857

In [13]:
#Tokenisation
zen = TextBlob("Beautiful is better than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex.")
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [14]:
#Get Sentences:

zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [15]:
for sentence in zen.sentences:
    print(sentence)

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.


In [16]:
#Word Inflection and Lemmatization
sentence = TextBlob('Use 4 spaces per indentation level.')
sentence.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [17]:

sentence.words[2].singularize()

'space'

In [18]:
sentence.words[0].pluralize()

'Uses'

In [19]:
from textblob import Word
w = Word("lions")
w.lemmatize()

'lion'

In [20]:

q = Word("went")
q.lemmatize("v") # Pass in WordNet part of speech

'go'

In [21]:
#WordNet Integration
Word("length").definitions

['the linear extent in space from one end to the other; the longest dimension of something that is fixed in place',
 'continuance in time',
 'the property of being the extent of something from beginning to end',
 'size of the gap between two places',
 'a section of something that is long and narrow']

In [22]:
animals = TextBlob("cow sheep octopus")
animals.words

WordList(['cow', 'sheep', 'octopus'])

In [23]:
# Spelling Correction

g = TextBlob(" Can you pronounce czechuslovakia?")
print(g.correct())

 An you pronounce czechoslovakia?


In [24]:

from textblob import Word
w = Word('longitude')
w.spellcheck()

[('longitude', 1.0)]

In [25]:
#Get Word and Noun Phrase Frequencies
sent = TextBlob('She sales sea shells at the sea shore')
sent.word_counts['sea']


2

In [26]:
#Translation and Language Detection

blob = TextBlob("hello")
blob.translate(from_lang='en', to='fr')

TextBlob("Bonjour")

In [27]:

d = TextBlob("Bonjour")
d.detect_language

In [28]:
# n-grams
# The TextBlob.ngrams() method returns a list of tuples of n successive words.


blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

In [29]:
# Get Start and End Indices of Sentences
# Use sentence.start and sentence.end to get the indices where a sentence starts and ends within a TextBlob


for k in zen.sentences:
    print(k)
    print("---- Starts at index {}, Ends at index {}".format(k.start, k.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95


In [30]:
# Text Classification system
# The textblob.classifiers module makes it simple to create custom classifiers.

train = [
       ('I love this sandwich.', 'pos'),
       ('this is an amazing place!', 'pos'),
       ('I feel very good about these beers.', 'pos'),
       ('this is my best work.', 'pos'),
       ("what an awesome view", 'pos'),
       ('I do not like this restaurant', 'neg'),
       ('I am tired of this stuff.', 'neg'),
       ("I can't deal with this", 'neg'),
       ('he is my sworn enemy!', 'neg'),
]
test = [
       ('the beer was good.', 'pos'),
       ('I do not enjoy my job', 'neg'),
       ("I ain't feeling dandy today.", 'neg'),
       ("I feel amazing!", 'pos'),
       ('Gary is a friend of mine.', 'pos'),
       ("I can't believe I'm doing this.", 'neg')
]

In [31]:

from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [32]:
cl.classify("This is an amazing library!")


'pos'

In [33]:
prob_dist = cl.prob_classify("This one's a doozy.")
prob_dist.max()

'pos'

In [34]:
prob_dist = cl.prob_classify("I am suffering from cold")
prob_dist.max()


round(prob_dist.prob("pos"), 2)



round(prob_dist.prob("neg"), 2)

0.69

In [35]:
# Classifying TextBlobs
# Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method.


from textblob import TextBlob
blob = TextBlob("Alcohal is good. But the hangover is horrible.", classifier=cl)
blob.classify()


for s in blob.sentences:
    print(s)
    print(s.classify())

Alcohal is good.
pos
But the hangover is horrible.
pos


In [36]:
# Evaluating Classifiers
# To compute the accuracy on our test set, use the accuracy(test_data) method.


cl.accuracy(test)

1.0