In [None]:
!pip install tflite
!pip install tensorflowjs
!pip install tflite-model-maker
!pip install tweet-preprocessor

In [None]:
import tensorflow as tf
import tflite
import tensorflowjs
import tflite_model_maker
import pandas as pd
import tweepy, json
import preprocessor as p
import re

In [None]:
tf.config.get_visible_devices()

In [None]:
!nvidia-smi

In [None]:
# register our client application with Twitter.
consumer_key=""
consumer_secret=""
# request tokens
access_token=""
access_token_secret=""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

In [None]:
api = tweepy.API(auth)
public_tweets = api.home_timeline()
for tweet in public_tweets:
    print(tweet.text)    

In [None]:
class MyTweetsStreamListener(tweepy.StreamListener):
    def __init__(self, max_tweets = 100):
        # call the base class constructor
        super(MyTweetsStreamListener, self).__init__()        
        self._max_tweets = max_tweets
        self.tweets = []

    def on_status(self, status):
        tweet = status._json
        self.tweets.append(tweet)
        if len(self.tweets)  >= self._max_tweets:
            return False
        return True

    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False

In [None]:
def fetch_tweets(topics, max_tweets = 100):
    l = MyTweetsStreamListener(max_tweets)
    tweets_stream = tweepy.Stream(auth=api.auth, listener=l)
    tweets_stream.filter(languages=["en"],track=topics)
    return l.tweets

In [None]:
# Gathering the data
covid_tweets = fetch_tweets(['covid','covid19','facemask','social distancing'], max_tweets=200)
nasa_tweets = fetch_tweets(['Nasa','SpaceX', "Moon"], max_tweets=200)

In [None]:
stopwords = ["for", "on", "an", "a", "of", "and", "in", "the", "to", "from"]

def clean_tweet(tweet):
    temp = tweet.lower()
    temp = re.sub("'", "", temp) # to avoid removing contractions in english
    temp = re.sub("@[A-Za-z0-9_]+","", temp)    
    temp = re.sub(r'http\S+', '', temp)
    temp = re.sub('[()!?]', ' ', temp)
    temp = re.sub('\[.*?\]',' ', temp)
    temp = re.sub("[^a-z0-9]"," ", temp)
    temp = temp.split()
    temp = [w for w in temp if not w in stopwords]
    temp = " ".join(word for word in temp)
    return temp

def create_labeled_df(tweets_list, label):
    df = pd.DataFrame()
    for tweet in tweets_list:
        df = df.append({
            "sentence": clean_tweet(tweet["text"]),
            "label": label
            }, ignore_index=True)   
    return df

In [None]:
covid_tweets_df = create_labeled_df(covid_tweets, label="covid")
nasa_tweets_df = create_labeled_df(nasa_tweets, label="nasa")

In [None]:
pd.concat([covid_tweets_df, nasa_tweets_df]).to_csv("tweets.csv", index=None)

In [None]:
from tflite_model_maker import model_spec
from tflite_model_maker import text_classifier
from tflite_model_maker.config import ExportFormat
from tflite_model_maker.text_classifier import AverageWordVecSpec
from tflite_model_maker.text_classifier import DataLoader

In [None]:
spec = AverageWordVecSpec(wordvec_dim=45)
#spec = model_spec.get('average_word_vec')
#spec = model_spec.get('mobilebert_classifier')

In [None]:
dataset = DataLoader.from_csv(
      filename='tweets.csv',
      text_column='sentence',
      label_column='label',
      shuffle=True,
      model_spec=spec
)

In [None]:
train_ds, val_ds = dataset.split(0.9)

In [None]:
model = text_classifier.create(train_ds, model_spec=spec, epochs=20)

In [None]:
model.evaluate(val_ds)

In [None]:
def predict_label(text):
    embedings = spec.preprocess(clean_tweet(text))
    embedings_tf = tf.convert_to_tensor(embedings)    
    embedings_tf = tf.expand_dims(embedings_tf, 0)
    predictions = model.predict_top_k(embedings_tf, k =2)
    return predictions

In [None]:
predict_label("Look at the moon!, do you see that guy with mask?")

In [None]:
predict_label("I got my first moderna shot today?")

In [None]:
model.export(export_dir='model',  
             export_format=[
                    ExportFormat.LABEL, 
                    ExportFormat.VOCAB, 
                    ExportFormat.TFJS, 
                    ExportFormat.TFLITE]
             )

In [None]:
model.summary()