In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/laxmimerit/twitter-data/master/twitt30k.csv')
df.head()

Unnamed: 0,twitts,sentiment
0,@robbiebronniman Sounds like a great night.,1
1,Damn the person who stolde my wallet !!!!! Ma...,1
2,Greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars Pretty pretty pretty please, pak...",0


In [4]:
df['sentiment'].value_counts()

1    15000
0    15000
Name: sentiment, dtype: int64

## SVM Model and Data Preparation 

In [5]:
def run_svm(df):
    X = df['twitts']
    y = df['sentiment']

    tfidf = TfidfVectorizer()
    X = tfidf.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)

    print('shape of X: ', X.shape)

    clf = LinearSVC()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    print()
    print('Printing Report')
    print(classification_report(y_test, y_pred))
    
    return tfidf, clf

In [6]:
%%time
tfidf, clf = run_svm(df)

shape of X:  (30000, 40854)

Printing Report
              precision    recall  f1-score   support

           0       0.75      0.74      0.75      3000
           1       0.74      0.75      0.75      3000

    accuracy                           0.75      6000
   macro avg       0.75      0.75      0.75      6000
weighted avg       0.75      0.75      0.75      6000

Wall time: 1.79 s


In [7]:
x = ['i am really happy. thanks a lot for coming with me']

In [8]:
clf.predict(tfidf.transform(x))

array([1], dtype=int64)

In [9]:
x = ['i am very sad']
clf.predict(tfidf.transform(x))

array([0], dtype=int64)

## Data Cleaning and Retraining SVM 

# Use our preprocess python package

In [14]:
pip install git+https://github.com/laxmimerit/preprocess_kgptalkie.git --upgrade --force-reinstall


Collecting git+https://github.com/laxmimerit/preprocess_kgptalkie.git
  Cloning https://github.com/laxmimerit/preprocess_kgptalkie.git to c:\users\icluster\appdata\local\temp\pip-req-build-2l9x0508
  Resolved https://github.com/laxmimerit/preprocess_kgptalkie.git to commit 9ca68d37027af9f6a30d54640347ce3b2e2694b3
Building wheels for collected packages: preprocess-kgptalkie
  Building wheel for preprocess-kgptalkie (setup.py): started
  Building wheel for preprocess-kgptalkie (setup.py): finished with status 'done'
  Created wheel for preprocess-kgptalkie: filename=preprocess_kgptalkie-0.1.3-py3-none-any.whl size=7691 sha256=0e8889f0faa5045323357bedd0c282812f0ce7d5e49d18f892d347ade2cef65a
  Stored in directory: C:\Users\ICLUSTER\AppData\Local\Temp\pip-ephem-wheel-cache-p7n4qf94\wheels\d1\c3\bb\559fe93e652b51cbc532f17e9693f3b70055f8560cf06c1fb3
Successfully built preprocess-kgptalkie
Installing collected packages: preprocess-kgptalkie
Successfully installed preprocess-kgptalkie-0.1.3
Not

  Running command git clone -q https://github.com/laxmimerit/preprocess_kgptalkie.git 'C:\Users\ICLUSTER\AppData\Local\Temp\pip-req-build-2l9x0508'


In [21]:
conda install -c conda-forge spacy

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\ICLUSTER\anaconda3

  added / updated specs:
    - spacy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    catalogue-2.0.6            |   py39hcbf5309_2          32 KB  conda-forge
    cymem-2.0.6                |   py39h415ef7b_3          34 KB  conda-forge
    cython-blis-0.7.7          |   py39h5d4886f_1         5.6 MB  conda-forge
    langcodes-3.3.0            |     pyhd8ed1ab_0         156 KB  conda-forge
    murmurhash-1.0.7           |   py39h415ef7b_0          24 KB  conda-forge
    pathy-0.6.1                |     pyhd8ed1ab_0          37 KB  conda-forge
    preshed-3.0.6              |   py39h415ef7b_2          83 KB  conda-forge
    pydantic-1.8.2             |   py39hb82d6ee_2         1.6 MB  conda-forge
    she

In [23]:
pip install -U textblob

Collecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
Installing collected packages: textblob
Successfully installed textblob-0.17.1
Note: you may need to restart the kernel to use updated packages.


In [27]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)

2022-05-13 16:09:02.581041: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2022-05-13 16:09:02.581143: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.



Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.3.0
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


In [28]:
import spacy
spacy.load('en_core_web_sm')
from spacy.lang.en import English

import preprocess_kgptalkie as pp

In [29]:
df['twitts'] = df['twitts'].apply(lambda x: x.lower())

In [30]:
df['twitts'] = df['twitts'].apply(lambda x: pp.cont_exp(x))

In [31]:
df

Unnamed: 0,twitts,sentiment
0,@robbiebronniman sounds like a great night.,1
1,damn the person who stolde my wallet !!!!! ma...,1
2,greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars pretty pretty pretty please, pak...",0
...,...,...
29995,@calumfan1 is it in any way related to photosh...,0
29996,@swiz_nz really? wow thats crap,0
29997,"at the 2010 lexus hs250h press event. again, ...",0
29998,@karmicunderpath ooooh now there is a nice tho...,1


In [32]:
run_svm(df)

shape of X:  (30000, 40753)

Printing Report
              precision    recall  f1-score   support

           0       0.75      0.74      0.75      3000
           1       0.75      0.76      0.75      3000

    accuracy                           0.75      6000
   macro avg       0.75      0.75      0.75      6000
weighted avg       0.75      0.75      0.75      6000



(TfidfVectorizer(), LinearSVC())

In [33]:
# remove emails and urls

df['twitts'] = df['twitts'].apply(lambda x: pp.remove_emails(x))
df['twitts'] = df['twitts'].apply(lambda x: pp.remove_urls(x))
df['twitts'] = df['twitts'].apply(lambda x: pp.remove_rt(x))
df['twitts'] = df['twitts'].apply(lambda x: pp.remove_html_tags(x))
df['twitts'] = df['twitts'].apply(lambda x: pp.remove_special_chars(x))


In [34]:
tfidf, clf = run_svm(df)

shape of X:  (30000, 42855)

Printing Report
              precision    recall  f1-score   support

           0       0.75      0.74      0.74      3000
           1       0.74      0.75      0.75      3000

    accuracy                           0.75      6000
   macro avg       0.75      0.75      0.75      6000
weighted avg       0.75      0.75      0.75      6000



In [35]:
x

['i am very sad']

In [36]:
clf.predict(tfidf.transform(x))

array([0], dtype=int64)

## Fine Tuning Model

## Saving and Loading ML Model 

In [37]:
import pickle

In [38]:
pickle.dump(clf, open('clf.pkl', 'wb'))
pickle.dump(tfidf, open('tfidf.pkl', 'wb'))

In [39]:
del clf
del tfidf

In [40]:
clf = pickle.load(open('clf.pkl', 'rb'))
tfidf = pickle.load(open('tfidf.pkl', 'rb'))

In [41]:
clf

LinearSVC()

In [42]:
x

['i am very sad']

In [43]:
clf.predict(tfidf.transform(x))

array([0], dtype=int64)

# Real-Time Twitter Sentiment Analysis 

In [3]:
consumer_key = 'R7DGimRNkT11sbngA0MRqLmNE'
consumer_secret = 'w5Axtw43feejwgmPIhqPhPOt1aHso1Guw1yuFwlmijtlh0vguK'
access_token = '1279486577656295425-l3gaKqKuHQdKl44rPXUc0WYcc26wgq'
access_token_secret = '80dGAdcx6LuoWM1mSt669V5NESP0EOuX1dK8Mianjqxi2'

In [1]:
!pip install tweepy

Collecting tweepy
  Downloading tweepy-4.9.0-py3-none-any.whl (77 kB)
Installing collected packages: tweepy
Successfully installed tweepy-4.9.0


In [4]:
import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

public_tweets = api.home_timeline()

In [5]:
type(public_tweets)

tweepy.models.ResultSet

In [6]:
public_tweets[0].text

'Matching Indiaâ€™s massive #talent base effectively to support the current demand environment for the $220 billion Inâ€¦ https://t.co/pnNX0TTcOF'

In [7]:
for tweet in public_tweets:
    print(tweet.text)

Matching Indiaâ€™s massive #talent base effectively to support the current demand environment for the $220 billion Inâ€¦ https://t.co/pnNX0TTcOF
RT @KirkDBorne: A Survey of #MachineLearning and #DeepLearning Applications in #Mobile &amp; Wireless Networking: https://t.co/65q0ya2G3M
â€”â€”â€”â€”â€”â€¦
RT @avikumart_: Day 15 #100DaysOfCode ML seriesðŸ¤–ðŸ‘‡

&gt;&gt;Naive Bayes is an algorithm based on Bayesian probability. it is used for classificatiâ€¦
RT @KirkDBorne: 50 Shades of Data
#AI
#Analytics
#BI
#BigData
#Database
#DataEngineering
#DataLake
#DataScience
#DataWarehouse
#DeepLearninâ€¦
RT @Paula_Piccard: Top 100 AI Unicorns and their Success Stories: A Legendary Path

Know more: https://t.co/M2pMKzLUTp

#MachineLearning #Aâ€¦
The shooting was reported shortly before 1.30 pm at Geneva Presbyterian Church in the city of Laguna Woods, the Oraâ€¦ https://t.co/kYVE6EJYvn
RT @SourabhSKatoch: 120 Python Projects with Source Code solved and explained for free.

https://t.co/HQ8RjgNMuk

