### Tweets Classifier

## Create Dataset

In [None]:
!pip install tflite
!pip install tflite-model-maker
!pip install git+https://github.com/tweepy/tweepy.git

In [None]:
import tensorflow as tf
import tflite
import tflite_model_maker
import pandas as pd
import tweepy, json


from tflite_model_maker import model_spec
from tflite_model_maker import text_classifier
from tflite_model_maker.config import ExportFormat
from tflite_model_maker.text_classifier import AverageWordVecSpec
from tflite_model_maker.text_classifier import DataLoader

In [None]:
tf.config.get_visible_devices()

In [None]:
!nvidia-smi

## Create dataset

In [None]:
def get_tweets(query_str, auth_token,max_results = 100):
    # auth with twitter
    client = tweepy.Client(bearer_token=auth_token)
    # fetch tweets
    # response = client.search_recent_tweets(query_str, max_results=max_results)

    # for tweet in response.data:
    #     print(tweet.text)
    tweets_list = []
    for tweet in tweepy.Paginator(client.search_recent_tweets, query=query_str, tweet_fields=['context_annotations', 'created_at',"lang"], max_results=100).flatten(limit=max_results):
        if tweet.lang == "en":
            tweets_list.append(tweet)

    return tweets_list

In [None]:
auth_token = 'AAAAAAAAAAAAAAAAAAAAAIjxjQEAAAAAxJdst4zo6bnyxQ0yRAqFERo1t7E%3Dhs0pO4BQMv03uvEuozij00ESNnPNo0W2k5LMr5kYygyigFviOE'
covid_tweets = get_tweets('covid OR "social distancing"', auth_token, max_results = 1000)

nasa_tweets = get_tweets('NASA OR spaceX OR Moon', auth_token, max_results = 1000)

In [None]:
len(covid_tweets)

In [None]:
len(nasa_tweets)

In [None]:
for t in covid_tweets[:5]:
    print(t.text)

In [None]:
for t in nasa_tweets[:5]:
    print(t.text)

In [None]:
def create_labeled_df(tweets_list, label):
    df = pd.DataFrame()
    for tweet in tweets_list:
        df = df.append({
            "sentence": tweet.text,
            "label": label
            }, ignore_index=True)   
    return df

In [None]:
covid_tweets_df = create_labeled_df(covid_tweets, label="covid")
nasa_tweets_df = create_labeled_df(nasa_tweets, label="nasa")

In [None]:
pd.concat([covid_tweets_df, nasa_tweets_df]).to_csv("tweets.csv", index=None)

## Train Model

In [None]:
spec = AverageWordVecSpec(wordvec_dim=32)#model_spec.get('average_word_vec')

In [None]:
dataset = DataLoader.from_csv(
      filename='tweets.csv',
      text_column='sentence',
      label_column='label',
      shuffle=True,
      model_spec=spec
)

In [None]:
train_ds, val_ds = dataset.split(0.8)

In [None]:
model = text_classifier.create(train_ds, model_spec=spec, epochs=50)

In [None]:
model.evaluate(val_ds)

In [None]:
def predict_label(text):
    embedings = spec.preprocess(text)
    embedings_tf = tf.convert_to_tensor(embedings)    
    embedings_tf = tf.expand_dims(embedings_tf, 0)
    predictions = model.predict_top_k(embedings_tf, k =2)
    return predictions

In [None]:
predict_label("Look at the moon!, do you see that guy with mask?")

In [None]:
predict_label("I got my first moderna shot today?")

In [None]:
predict_label("Should I be able to life in the moon?")

In [None]:
model.summary()

### Export model to TFLite

In [None]:
model.export(export_dir='model',  
             export_format=[
                    ExportFormat.LABEL, 
                    ExportFormat.VOCAB, 
                    #ExportFormat.TFJS, 
                    ExportFormat.TFLITE]
             )

In [None]:
import shutil
from pathlib import Path
from google.colab import files
import sys

def folder_to_zip(folder_path):
    folder_path = Path(folder_path)
    if not folder_path.exists():
        return
    shutil.make_archive(folder_path.name, 'zip', folder_path)
    return folder_path.with_suffix(".zip")

def zip_and_download_folder_content(model_folder):
    zip_file = folder_to_zip(model_folder)
    files.download(zip_file)


In [None]:
zip_and_download_folder_content("model")