# TextBlob is a Python (2 and 3) library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more.

In [3]:
import pandas as pd
from textblob import TextBlob
from nltk.tokenize.toktok import ToktokTokenizer
import re
tokenizer = ToktokTokenizer()
import spacy
nlp = spacy.load('en_core_web_sm', disable=['ner'])

In [4]:
### Data Loading
tweet=pd.read_csv("Elon_musk.csv",encoding='latin1')
tweet = tweet.drop('Unnamed: 0',axis = 1)
tweet

Unnamed: 0,Text
0,@kunalb11 Im an alien
1,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,@joerogan @Spotify Great interview!
3,@gtera27 Doge is underestimated
4,@teslacn Congratulations Tesla China for amazi...
...,...
1994,"@flcnhvy True, it sounds so surreal, but the n..."
1995,@PPathole Make sure to read ur terms &amp; con...
1996,@TeslaGong @PPathole Samwise Gamgee
1997,@PPathole Altho Dumb and Dumber is <U+0001F525...


In [5]:
from sklearn.utils import shuffle
tweet = shuffle(tweet)

In [6]:
tweet

Unnamed: 0,Text
1511,@SpaceIntellige3 @guidodecaso @cnunezimages My...
74,@Erdayastronaut @josh_bickett @ajtourville @Sp...
281,@TheBabylonBee A lot of people are going to be...
1207,RT @SpaceX: Completed a full duration test fir...
488,@PPathole @ErcXspace @SpaceX *cooling haha
...,...
1986,Best use of the term Full Stack?
1646,@PPathole @TrevorMahlmann @arstechnica Well d...
234,@Wikipedia Happy birthday Wikipedia! So glad y...
349,@flabellina212 @RationalEtienne @biogirl09 @Er...


In [7]:
tweet.isnull().sum()

Text    0
dtype: int64

In [8]:
import numpy as np
tweet.replace(r'^\s*$', np.nan, regex=True,inplace=True)
tweet.dropna(axis = 0, how = 'any', inplace = True)

In [9]:
tweet.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)
print('escape seq removed')

escape seq removed


In [10]:
import numpy as np
tweet.replace(r'^\s*$', np.nan, regex=True,inplace=True)
tweet.dropna(axis = 0, how = 'any', inplace = True)

In [11]:
tweet

Unnamed: 0,Text
1511,@SpaceIntellige3 @guidodecaso @cnunezimages My...
74,@Erdayastronaut @josh_bickett @ajtourville @Sp...
281,@TheBabylonBee A lot of people are going to be...
1207,RT @SpaceX: Completed a full duration test fir...
488,@PPathole @ErcXspace @SpaceX *cooling haha
...,...
1986,Best use of the term Full Stack?
1646,@PPathole @TrevorMahlmann @arstechnica Well d...
234,@Wikipedia Happy birthday Wikipedia! So glad y...
349,@flabellina212 @RationalEtienne @biogirl09 @Er...


In [12]:
tweet['Text']=tweet['Text'].str.encode('ascii', 'ignore').str.decode('ascii')
print('non-ascii data removed')

non-ascii data removed


In [13]:
import string
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [14]:
def remove_punctuations(Text):
    import string
    for punctuation in string.punctuation:
        Text = Text.replace(punctuation, '')
    return Text
tweet['Text']=tweet['Text'].apply(remove_punctuations)

In [15]:
tweet

Unnamed: 0,Text
1511,SpaceIntellige3 guidodecaso cnunezimages My fa...
74,Erdayastronaut joshbickett ajtourville SpaceX ...
281,TheBabylonBee A lot of people are going to be ...
1207,RT SpaceX Completed a full duration test fire ...
488,PPathole ErcXspace SpaceX cooling haha
...,...
1986,Best use of the term Full Stack
1646,PPathole TrevorMahlmann arstechnica Well do se...
234,Wikipedia Happy birthday Wikipedia So glad you...
349,flabellina212 RationalEtienne biogirl09 ErcXsp...


In [16]:
!python -m spacy download en_core_web_sm

2022-10-31 14:12:37.824657: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en-core-web-sm==3.4.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 4.1 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [17]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
print(stopwords.words('english'))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [18]:
stopword_list = nltk.corpus.stopwords.words('english')
stopword_list.remove('no')
stopword_list.remove('not')

In [19]:
def custom_remove_stopwords(Text, is_lower_case=False):
    tokens = tokenizer.tokenize(Text)
    tokens = [token.strip() for token in tokens]
    if is_lower_case:
        filtered_tokens = [token for token in tokens if token not in stopword_list]
    else:
        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
    filtered_text = ' '.join(filtered_tokens)    
    return filtered_text

In [20]:
tweet['Text']=tweet['Text'].apply(custom_remove_stopwords)

In [21]:
tweet

Unnamed: 0,Text
1511,SpaceIntellige3 guidodecaso cnunezimages favor...
74,Erdayastronaut joshbickett ajtourville SpaceX ...
281,TheBabylonBee lot people going super unhappy W...
1207,RT SpaceX Completed full duration test fire Ra...
488,PPathole ErcXspace SpaceX cooling haha
...,...
1986,Best use term Full Stack
1646,PPathole TrevorMahlmann arstechnica Well sever...
234,Wikipedia Happy birthday Wikipedia glad exist
349,flabellina212 RationalEtienne biogirl09 ErcXsp...


In [22]:
def remove_special_characters(Text):
    Text = re.sub('[^a-zA-z0-9\s]', '', Text)
    return Text

In [23]:
tweet['Text']=tweet['Text'].apply(remove_special_characters)

In [24]:
def remove_html(Text):
    import re
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r' ', Text)

In [25]:
tweet['Text']=tweet['Text'].apply(remove_html)

In [26]:
def cleanse(word):
    rx = re.compile(r'\D*\d')
    if rx.match(word):
        return ''
    return word
def remove_alphanumeric(strings):
    nstrings = [" ".join(filter(None, (
    cleanse(word) for word in string.split()))) 
    for string in strings.split()]
    str1 = ' '.join(nstrings)
    return str1

In [27]:
tweet['Text']=tweet['Text'].apply(remove_alphanumeric)

In [28]:
tweet

Unnamed: 0,Text
1511,guidodecaso cnunezimages favorite one tension...
74,Erdayastronaut joshbickett ajtourville SpaceX ...
281,TheBabylonBee lot people going super unhappy W...
1207,RT SpaceX Completed full duration test fire Ra...
488,PPathole ErcXspace SpaceX cooling haha
...,...
1986,Best use term Full Stack
1646,PPathole TrevorMahlmann arstechnica Well sever...
234,Wikipedia Happy birthday Wikipedia glad exist
349,RationalEtienne ErcXspace Yes


# Polarity and Subjectivity
Polarity is a float value which helps in identifying whether a sentence is positive or negative. Its values ranges in [-1,1] where 1 means positive statement and -1 means a negative statement.

On the other side, Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information. Subjectivity is also a float which lies in the range of [0,1]. Closer the value to 1, more likly it is public opinion.

In [29]:
tweet['sentiment'] = tweet['Text'].apply(lambda tweet: TextBlob(tweet).sentiment)

In [30]:
tweet

Unnamed: 0,Text,sentiment
1511,guidodecaso cnunezimages favorite one tension...,"(0.3181818181818182, 0.7272727272727273)"
74,Erdayastronaut joshbickett ajtourville SpaceX ...,"(0.25, 0.5)"
281,TheBabylonBee lot people going super unhappy W...,"(0.07333333333333333, 0.7266666666666666)"
1207,RT SpaceX Completed full duration test fire Ra...,"(0.1708333333333333, 0.275)"
488,PPathole ErcXspace SpaceX cooling haha,"(0.2, 0.3)"
...,...,...
1986,Best use term Full Stack,"(0.675, 0.42500000000000004)"
1646,PPathole TrevorMahlmann arstechnica Well sever...,"(0.14, 0.33499999999999996)"
234,Wikipedia Happy birthday Wikipedia glad exist,"(0.65, 1.0)"
349,RationalEtienne ErcXspace Yes,"(0.0, 0.0)"


In [31]:
sentiment_series = tweet['sentiment'].tolist()
sentiment_series

[Sentiment(polarity=0.3181818181818182, subjectivity=0.7272727272727273),
 Sentiment(polarity=0.25, subjectivity=0.5),
 Sentiment(polarity=0.07333333333333333, subjectivity=0.7266666666666666),
 Sentiment(polarity=0.1708333333333333, subjectivity=0.275),
 Sentiment(polarity=0.2, subjectivity=0.3),
 Sentiment(polarity=-0.4166666666666667, subjectivity=0.7777777777777778),
 Sentiment(polarity=0.0, subjectivity=0.0),
 Sentiment(polarity=0.2, subjectivity=0.3),
 Sentiment(polarity=0.8, subjectivity=0.75),
 Sentiment(polarity=0.0, subjectivity=0.1),
 Sentiment(polarity=0.0, subjectivity=1.0),
 Sentiment(polarity=0.2, subjectivity=0.2),
 Sentiment(polarity=0.13333333333333333, subjectivity=0.6666666666666666),
 Sentiment(polarity=0.3, subjectivity=0.45),
 Sentiment(polarity=0.17916666666666667, subjectivity=0.4666666666666666),
 Sentiment(polarity=0.0, subjectivity=0.0),
 Sentiment(polarity=-0.09722222222222222, subjectivity=0.48055555555555546),
 Sentiment(polarity=0.0, subjectivity=0.0),
 

In [32]:
columns = ['polarity', 'subjectivity']
df1 = pd.DataFrame(sentiment_series, columns=columns, index=tweet.index)
df1

Unnamed: 0,polarity,subjectivity
1511,0.318182,0.727273
74,0.250000,0.500000
281,0.073333,0.726667
1207,0.170833,0.275000
488,0.200000,0.300000
...,...,...
1986,0.675000,0.425000
1646,0.140000,0.335000
234,0.650000,1.000000
349,0.000000,0.000000


In [33]:
result = pd.concat([tweet,df1],axis=1)
result

Unnamed: 0,Text,sentiment,polarity,subjectivity
1511,guidodecaso cnunezimages favorite one tension...,"(0.3181818181818182, 0.7272727272727273)",0.318182,0.727273
74,Erdayastronaut joshbickett ajtourville SpaceX ...,"(0.25, 0.5)",0.250000,0.500000
281,TheBabylonBee lot people going super unhappy W...,"(0.07333333333333333, 0.7266666666666666)",0.073333,0.726667
1207,RT SpaceX Completed full duration test fire Ra...,"(0.1708333333333333, 0.275)",0.170833,0.275000
488,PPathole ErcXspace SpaceX cooling haha,"(0.2, 0.3)",0.200000,0.300000
...,...,...,...,...
1986,Best use term Full Stack,"(0.675, 0.42500000000000004)",0.675000,0.425000
1646,PPathole TrevorMahlmann arstechnica Well sever...,"(0.14, 0.33499999999999996)",0.140000,0.335000
234,Wikipedia Happy birthday Wikipedia glad exist,"(0.65, 1.0)",0.650000,1.000000
349,RationalEtienne ErcXspace Yes,"(0.0, 0.0)",0.000000,0.000000


In [34]:
result.loc[result['polarity']>=0.3, 'Sentiment'] = "Positive"
result.loc[result['polarity']<0.3, 'Sentiment'] = "Negative"

In [35]:
result[0:20]

Unnamed: 0,Text,sentiment,polarity,subjectivity,Sentiment
1511,guidodecaso cnunezimages favorite one tension...,"(0.3181818181818182, 0.7272727272727273)",0.318182,0.727273,Positive
74,Erdayastronaut joshbickett ajtourville SpaceX ...,"(0.25, 0.5)",0.25,0.5,Negative
281,TheBabylonBee lot people going super unhappy W...,"(0.07333333333333333, 0.7266666666666666)",0.073333,0.726667,Negative
1207,RT SpaceX Completed full duration test fire Ra...,"(0.1708333333333333, 0.275)",0.170833,0.275,Negative
488,PPathole ErcXspace SpaceX cooling haha,"(0.2, 0.3)",0.2,0.3,Negative
1273,TJLK eerie parallels Boring Company amp VaultT...,"(-0.4166666666666667, 0.7777777777777778)",-0.416667,0.777778,Negative
1629,BernieSanders,"(0.0, 0.0)",0.0,0.0,Negative
1227,tobyliiiiiiiiii CARandDRIVER Tesla Haha,"(0.2, 0.3)",0.2,0.3,Negative
442,hajekmiloslav Great nails amp lipstick,"(0.8, 0.75)",0.8,0.75,Positive
1990,kenyanwalstreet Not actually payout vesting st...,"(0.0, 0.1)",0.0,0.1,Negative


In [36]:
TextBlob("Any crypto wallet that won’t give you your private keys should be avoided at all costs").sentiment

Sentiment(polarity=0.0, subjectivity=0.375)

In [37]:
TextBlob("he is good good good boy").sentiment

Sentiment(polarity=0.6999999999999998, subjectivity=0.6000000000000001)

In [38]:
TextBlob("he is very good good boy").sentiment

Sentiment(polarity=0.8049999999999999, subjectivity=0.6900000000000002)