In [40]:
import warnings
warnings.filterwarnings('ignore')
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
import pandas as pd
from tensorflow.keras import models, preprocessing #, layers, callbacks
import plotly.express as px
from geopy.geocoders import Nominatim
import plotly.graph_objects as go
import numpy as np
from dash import dcc, html, Input, Output, State
# Nuevos tweets
import re
import os
import nltk
import requests
import unidecode
import unicodedata
import contractions
from dotenv import load_dotenv
from collections import Counter
from nltk.corpus import stopwords
import collecting_tweets
from geopy.geocoders import Nominatim

In [41]:
# to get city coordinates
geolocator = Nominatim(user_agent = 'bmartin')

In [42]:
# Prueba para nuevos tweets
load_dotenv('data/envs/kafka.env', override = True)

# getting twitter credentials
twitter_key = os.environ.get('api_key')
twitter_secret_key = os.environ.get('secret_key')
bearer_token = os.environ.get('bearer_token')

In [43]:
class PreProcessor:
    
    def __init__(self, regex_dict = None):
        
        # creating classes
        # stem
        self.sb = nltk.stem.SnowballStemmer('english')
        
        # lemmatize
        self.lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
        
        # translate
        #self.translator = Translator()
        
        # declare a default regex dict
        self.default_regex_dict = {'goo[o]*d':'good', '2morrow':'tomorrow', 'b4':'before', 'otw':'on the way',
                                   'idk':"i don't know", ':)':'smile', 'bc':'because', '2nite':'tonight',
                                   'yeah':'yes', 'yeshhhhhhhh':'yes', ' yeeeee':'yes', 'btw':'by the way', 
                                   'fyi':'for your information', 'gr8':'great', 'asap':'as soon as possible', 
                                   'yummmmmy':'yummy', 'gf':'girlfriend', 'thx':'thanks','nowwwwwww':'now', 
                                   ' ppl ':' people ', 'yeiii':'yes'}
        
        # if no regex_dict defined by user, then use 
        # one by default. Else, concat two regex dicts
        if regex_dict:            
            self.regex_dict = {**regex_dict, **default_regex_dict}
            
        else:
            self.regex_dict = self.default_regex_dict

    def removeNoise(self, pdf):
        
        """
        Function to remove noise from strings. 
        
        Inputs: A pandas dataframe with raw strings of length n.
        
        Output: A clean string where elements such as accented 
        words, html tags, punctuation marks, and extra white 
        spaces will be removed (or transform) if it's the case.
        """
        
        # to lower case
        pdf["clean_tweet"] = pdf.text.apply(lambda x: x.lower())
        
        # remove accented characters from string
        # e.g. canción --> cancion
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: unidecode.unidecode(x))
        
        # remove html tags 
        pdf["clean_tweet"] = pdf.clean_tweet.str.replace(r'<[^<>]*>', '', regex=True)
        
        # remove (match with) usernames | hashtags | punct marks | links
        # punct marks = ",.':!?;
        # do not remove: ' 
        # but remove: "
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x:' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)|([-.,:_;])|(https?:\/\/.*[\r\n]*)",
                                                                            " ", x).split()).replace('"',''))
                
        # remove white spaces at the begining and at 
        # the end of a string
        pdf['clean_tweet'] = pdf.clean_tweet.apply(lambda x: x.lstrip(' '))
        pdf['clean_tweet'] = pdf.clean_tweet.apply(lambda x: x.rstrip(' '))
        
        # Translate tweet
        #pdf["clean_tweet"] = pdf.apply(lambda x: self.translate_twt(x) if pd.isnull(x.clean_tweet) == False else x, axis = 1)
        
        # normalize string
        # normalize accented charcaters and other strange characters
        # NFKD if there are accented characters (????
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: unicodedata.normalize('NFKC', x).encode('ASCII', 'ignore').decode("utf-8"))
        
        return pdf
    
    
    def textNormalization(self, pdf):
        
        """
        Function to normalize a string. 
        
        Inputs: A pandas dataframe with strings (of length n) that 
        will be normalized. 
        
        Outputs: A normalized string whitout noise, words in their
        (expected) correct form and with no stopwords.
        """
        
        # remove noise first
        pdf = self.removeNoise(pdf)

        # expand contractions
        # e.g. don't --> do not
        pdf['clean_tweet'] = pdf.clean_tweet.apply(lambda x: contractions.fix(x))
 
        # Normalize words
        pdf['clean_tweet'] = pdf.clean_tweet.replace(self.regex_dict)
                
        # get English stopwords    
        stop_words = stopwords.words('english')
        stopwords_dict = Counter(stop_words)
        
        # remove stopwords from string
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: ' '.join([word for word in x.split()
                                                                       if word not in stopwords_dict]))
            
        return pdf
    
    
    def wordTokenize(self, pdf):
        """
        Function to tokenize a string into words. Tokenization is a way 
        of separating a piece of text into smaller units called tokens.
        In this case tokens are words (but can also be characters or 
        subwords).
        
        Inputs: A pandas dataframe with strings (of length n) that will be tokenized. 
        
        Outputs: A list of tokenized words.
        """
        # string normalized
        #normalized = self.textNormalization(string)
        pdf = self.textNormalization(pdf)
        
        # Use word_tokenize method to split the string
        # into individual words. By default it returns
        # a list.
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: nltk.word_tokenize(x))        
        
        # Using isalpha() will help us to only keep
        # items from the alphabet (no punctuation
        # marks). 
        #pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: [word for word in x if word.isalpha()])
        
        # Keep only unique elements
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: list(set(x)))

        # return list of tokenized words by row
        return pdf
    
    def phraseTokenize(self, pdf):
        
        """
        Function to tokenize a string into sentences. Tokenization is
        a way of separating a piece of text into smaller units called
        tokens. In this case tokens are phrases (but can also be words,
        characters or subwords).
        
        Inputs: A string (of length n) that will be tokenized. 
        
        Outputs: A list of tokenized sentences.
        """
        
        # pandas dataframe with strings normalized
        pdf = self.textNormalization(pdf)
        
        # Use sent_tokenize method to split the string
        # into sentences. By default it returns a list.
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: nltk.sent_tokenize(x))   
        
        return pdf 
    
    
    def stemWords(self, pdf):
        
        """
        Function to stem strings. Stemming is the process of reducing
        a word to its word stem that affixes to suffixes and prefixes 
        or to the roots of words (known as a lemma).
        
        Inputs: A raw string of length n.
        
        Output: Roots of each word of a given string.
        """
        
        # pandas dataframe with strings normalized
        pdf = self.textNormalization(pdf)
        
        # tokenized string (into words)
        pdf = self.wordTokenize(data)
            
        # reduct words to its root    
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: [self.sb.stem(word) for word in x])
        
        return pdf
    
    
    def lemmatizeWords(self, pdf):
        
        """
        Function to lemmatize strings. Lemmatization is a method 
        responsible for grouping different inflected forms of 
        words into the root form, having the same meaning. It is 
        similar to stemming.
        
        Inputs: A raw string of length n.
        
        Output: Roots of each word of a given string (with better
        performance than in stemming).
        """
        
        # pandas dataframe with strings normalized
        pdf = self.textNormalization(pdf)
        
        # list of tokenized words (from string)
        # Here it was decided to tokenize by words
        # rather than by sentences due to we thought
        # it would be easier to find the correct roots
        # of each word.
        pdf = self.wordTokenize(pdf)
        
        # lematize word from list of tokenized words
        #lematized = [self.lemmatizer.lemmatize(word) for word in tokenized]
        pdf["clean_tweet"] = pdf.clean_tweet.apply(lambda x: [self.lemmatizer.lemmatize(word) for word in x])
        
        return pdf

In [44]:
# mapbox token
px.set_mapbox_access_token(open("data/mapbox_token/.mapbox_token").read())

In [45]:
# Plotly dark template
template='plotly_dark'

In [46]:
# Read historical data and model from folder 'trained_model'
hist = pd.read_csv('trained_model/preds/tweets_preds.csv')
keras_model = models.load_model("trained_model/tf_keras_model.h5")

In [47]:
# Read saved locations from historical data
geoplaces = pd.read_csv('data/places/geoplaces/geoplaces.csv')

In [48]:
# Graph 1 - Tweeter score distribution

# Generate dist plot
fig_1 = px.histogram(hist, x="pred_scores", 
                   color="toxic",
                   labels={
                     "pred_scores": "Score",
                   },
                   marginal="box", # or violin, rug
                   template=template,
                   #hover_name='class',
                   color_discrete_sequence=px.colors.qualitative.G10,
                   nbins=50,
                   opacity=0.8
                   )

fig_1.update_layout(
    title={"text": "Tweets Classification Scores", "x": 0.5}, 
    yaxis_title="Frequency",
    barmode='overlay', 
    legend_title_text='Toxic', 
    #paper_bgcolor='rgba(0,0,0,0)',
    #plot_bgcolor='rgba(0,0,0,0)'
)
#fig.show()

In [49]:
# To geodataframe
#geo_df = gpd.GeoDataFrame(geoplaces, geometry=gpd.points_from_xy(geoplaces.lon, geoplaces.lat))

# Tweet location distribution
fig_2 = px.density_mapbox(geoplaces,
                        lat=geoplaces.lat,
                        lon=geoplaces.lon, 
                        #z = 'counts',
                        radius=15,
                        color_continuous_scale=px.colors.sequential.Jet,
                        #center=dict(lat=geo_df.geometry.y, lon=geo_df.geometry.x), 
                        hover_name='full_name',
                        hover_data={'counts':True},
                        template=template,
                        zoom=1.5,
                        )
fig_2.update_layout(
    title={"text": "Tweets Density", "x": 0.5},
    coloraxis_showscale=False, 
    #paper_bgcolor='rgba(0,0,0,0)',
    #plot_bgcolor='rgba(0,0,0,0)'
    margin={'b':20, 't':45}
)

In [50]:
geoplaces['random_color']=np.random.rand(len(geoplaces))

In [51]:
fig_3 = px.scatter_mapbox(geoplaces,
                        lat=geoplaces.lat,
                        lon=geoplaces.lon,
                        hover_name="full_name",
                        hover_data={'random_color':False, 'counts':True},
                        template=template,
                        color='random_color',
                        color_continuous_scale=px.colors.cyclical.HSV,
                        #size='counts',
                        #size_max=20,
                        #color_discrete_sequence=px.colors.qualitative.G10,
                        zoom=1.5)
fig_3.update_layout(
    title={"text": "Tweets Location", "x": 0.5},
    coloraxis_colorbar={'title':'Tweets<br>Number'},
    coloraxis_showscale=False,
    #paper_bgcolor='rgba(0,0,0,0)',
    #plot_bgcolor='rgba(0,0,0,0)'
    margin={'b':20,'t':45}
)

In [52]:
#fig_3.data[0]['lat'], fig_3.data[0]['lon'], fig_3.data[0]['marker']['color'], fig_3.data[0]['hovertext'], fig_3.data[0]['customdata']=  np.append(fig_3.data[0]['lat'],20.400417), np.append(fig_3.data[0]['lon'],-89.134857), np.append(fig_3.data[0]['marker']['color'],np.random.rand()), np.append(fig_3.data[0]['hovertext'],'Yucatán'), np.vstack([fig_3.data[0]['customdata'],np.array([0,1])])

In [53]:
# Style application
external_stylesheets = [dbc.themes.CYBORG]
app = dash.Dash(external_stylesheets=[external_stylesheets[0]])

In [54]:
# Convert to datetime format
hist['created_at']= pd.to_datetime(hist['created_at'])

In [55]:
# Tweets by Hour
a = hist.groupby([pd.Grouper(freq='D', key='created_at')]).tweet_id.count().reset_index()

# Tweets by class and hour
b = hist.groupby([pd.Grouper(freq='D', key='created_at'), 'toxic']).tweet_id.count().reset_index()

# Merge both
c = b.merge(a, right_on='created_at',left_on='created_at',how='left')

In [56]:
# Filter tweets per class
toxic = c[c['toxic']==1]
no_toxic = c[c['toxic']==0]

In [57]:
fig_4 = go.Figure()

fig_4.add_trace(go.Scatter(
        x=c.created_at,
        y=c.tweet_id_y,
        name='Total Tweets',
        mode='markers+lines',
        line=dict(color='white', width=3),
        ))
fig_4.add_trace(go.Bar(
        x=no_toxic.created_at,
        y=no_toxic.tweet_id_x,
        name='Non Toxic',
        opacity=.5,
        #visible="legendonly"
        marker=dict(color=px.colors.qualitative.G10[0])
        ))
fig_4.add_trace(go.Bar(
        x=toxic.created_at,
        y=toxic.tweet_id_x,
        name='Toxic',
        #visible=True
        opacity=.5,
        marker=dict(color=px.colors.qualitative.G10[1])
        ))

fig_4.update_layout(barmode='stack',template=template,title={"text": "Tweets Frequency Timeline", "x": 0.5},
        yaxis_title="Number of Tweets",
        #paper_bgcolor='rgba(0,0,0,0)',
        #plot_bgcolor='rgba(0,0,0,0)'
        #xaxis_title="Date"
        )

In [58]:
# Generate random timestamps
from random import randrange
import datetime 

random_dates = []
def random_date(start,l):
   current = start
   while l >= 0:
    current = current + datetime.timedelta(minutes=randrange(10))
    yield current
    l-=1

startDate = datetime.datetime(2021, 12, 2, 00, 00)


for x in reversed(list(random_date(startDate,len(geoplaces)-1))):
    random_dates.append(x.strftime("%d/%m/%y %H:%M"))

geoplaces['created_at']=random_dates
geoplaces['created_at']=pd.to_datetime(geoplaces['created_at'])
geoplaces['dt_str'] = geoplaces['created_at'].apply(lambda x: x.strftime("%d/%m/%y %H"))

geoplaces = geoplaces.sort_values('created_at')

In [59]:
fig_5 = px.scatter_mapbox(geoplaces,
                        lat=geoplaces.lat,
                        lon=geoplaces.lon,
                        hover_name="full_name",
                        #hover_data={'score':True},
                        template=template,
                        color='random_color',
                        color_continuous_scale=px.colors.cyclical.HSV,
                        zoom=1.5,
                        animation_frame="dt_str",
                        )
fig_5.update_layout(
    title={"text": "Today's Tweets Location per hour", "x": 0.5},
    #coloraxis_colorbar={'title':'Tweets<br>Number'},
    coloraxis_showscale=False,
    #paper_bgcolor='rgba(0,0,0,0)',
    #plot_bgcolor='rgba(0,0,0,0)'
    margin={'t':45},
    height=750
)

In [60]:
card = html.Div(
    [
        dbc.Card(
            [
                dbc.CardBody(
                    [   
                        html.H5("Tweets from nov 21st to nov 30: {}".format(len(hist)), className="card-title", id='title-counter'),
                        #dcc.Store(id='original_df', children=hist.to_json(date_format='iso', orient='split')),

                        dbc.Row(
                            [
                                dbc.Col(
                                    [
                                        html.P("Click the button to update the dashboard with today's tweets"),
                                        html.Button('Update', id='submit-val', n_clicks=0),
                                        html.Div(id='container-button-basic',
                                        children=''),
                                        dcc.Store(id='intermediate-value'),
                                        dcc.Store(id='intermediate-value-places'),
                                        html.Br(),
                                        html.Div(id='test-output',
                                        children='output prueba'),
                                        html.Br(),
                                        html.Div(id='test-output-places',
                                        children='Sin nuevas locaciones'),
                                        html.Br(),
                                    ], width=6
                                ),
                                dbc.Col(
                                    [
                                        html.H5("Add a new Tweet location to the map",className="card-title"),
                                        dcc.Input(id='input-1-state', type='number', placeholder='Insert latitude'),
                                        dcc.Input(id='input-2-state', type='number', placeholder='Insert longitude'),\
                                        dcc.Input(id='input-3-state', type='text', placeholder='Insert label name'),
                                        html.Button(id='submit-button-state', n_clicks=0, children='Submit'),
                                        html.Div(id='output-state')
                                    ]
                                ),
                            ]
                        ),

                        
                        dbc.Row(
                            [
                                dbc.Col(
                                    [ 
                                        dcc.Graph(id='fig_2', figure=fig_2)
                                    ], width=6
                                ),
                                dbc.Col(
                                    [ 
                                        dcc.Graph(id='fig_3', figure=fig_3)
                                    ], width=6
                                )
                            ], align='center'
                        ),
                        html.Br(),
                        dbc.Row(
                            [
                                dbc.Col(
                                    [
                                        dcc.Graph(id='fig_1', figure=fig_1)
                                    ], width=6
                                ),
                                dbc.Col(
                                    [
                                        dcc.Graph(id='fig_4', figure=fig_4)
                                    ], width=6
                                )
                            ], align='center', #style={'background-color':'#060606'}
                        )
                    ]
                )
            ]
        )
    ]
)


In [61]:
card_2 = html.Div(
    [
        dbc.Card(
            [
                dbc.CardBody(
                    [   
                        #html.H5("Card title", className="card-title"),
                        #html.P("This card has some text content, but not much else"),
                        dbc.Row(
                            [
                                dbc.Col(
                                    [ 
                                        dcc.Graph(id='fig_5', figure=fig_5)
                                    ], width=12
                                )
                            ], align='center',
                        ),
                    ]
                )
            ]
        )
    ]
)

In [62]:
tab_1 = dcc.Tab(label="Overview",children=[card])
tab_2 = dcc.Tab(label="Tweets Animation Frame",children=[card_2], style={'height':'750'})

In [63]:
# Layout
app.layout = html.Div([html.H1('COVID-Tweets Dashboard'),
                       #html.P('Holi SUbtitulo o historia mamon'),
                       dcc.Tabs(
                          [
                             tab_1,
                             tab_2
                          ]
                       )
                       ]
                    )

In [64]:
def loc(x):
    try:
        return geolocator.geocode(x)[1]
    except:
        return None

In [65]:
@app.callback([Output('output-state', 'children'), Output('fig_2', 'figure'), Output('fig_3', 'figure'), Output('test-output-places', 'children')],
              [Input('submit-button-state', 'n_clicks'),Input('intermediate-value-places', 'data')],
              State('input-1-state', 'value'),
              State('input-2-state', 'value'),
              State('input-3-state', 'value'),)
def update_output(n_clicks, new_places, input1, input2, input3):
    
    # Single location by hand
    fig_2.data[0]['lat'], fig_2.data[0]['lon'], fig_2.data[0]['hovertext'], fig_2.data[0]['customdata']=  np.append(fig_2.data[0]['lat'],input1), np.append(fig_2.data[0]['lon'],input2), np.append(fig_2.data[0]['hovertext'],input3), np.vstack([fig_2.data[0]['customdata'],np.array([1])])
    fig_3.data[0]['lat'], fig_3.data[0]['lon'], fig_3.data[0]['marker']['color'], fig_3.data[0]['hovertext'], fig_3.data[0]['customdata']=  np.append(fig_3.data[0]['lat'],input1), np.append(fig_3.data[0]['lon'],input2), np.append(fig_3.data[0]['marker']['color'],np.random.rand()), np.append(fig_3.data[0]['hovertext'],input3), np.vstack([fig_3.data[0]['customdata'],np.array([0,1])])
    
    # Many new locations with button
    new_places = pd.read_json(new_places, orient='split')
    
    if new_places.shape[0]>0:
        places_names = new_places.full_name.values
        fig_2.data[0]['lat'], fig_2.data[0]['lon'], fig_2.data[0]['hovertext'], fig_2.data[0]['customdata']=  np.append(fig_2.data[0]['lat'],new_places.lat.values), np.append(fig_2.data[0]['lon'],new_places.lon.values), np.append(fig_2.data[0]['hovertext'],new_places.full_name.values), np.vstack([fig_2.data[0]['customdata'],np.array([[1] for i in range(len(new_places))])])
        fig_3.data[0]['lat'], fig_3.data[0]['lon'], fig_3.data[0]['marker']['color'], fig_3.data[0]['hovertext'], fig_3.data[0]['customdata']=  np.append(fig_3.data[0]['lat'],new_places.lat.values), np.append(fig_3.data[0]['lon'],new_places.lon.values), np.append(fig_3.data[0]['marker']['color'],np.random.rand(len(new_places))), np.append(fig_3.data[0]['hovertext'],new_places.full_name.values), np.vstack([fig_3.data[0]['customdata'],np.array([[0,1] for i in range(len(new_places))])])
    else:
        places_names = 'None yet'
        pass
    
    return u'''
        The Button has been pressed {} times,
        new place "{}" added, 
        latitude is "{}",
        and longitude is "{}"
    '''.format(n_clicks, input3, input1, input2), fig_2, fig_3, 'PLACES ADDED: {}'.format(places_names)

In [66]:
# Callbacks for real-time
@app.callback(
    [Output('container-button-basic', 'children'),Output('intermediate-value', 'data'),Output('intermediate-value-places', 'data')],
    Input('submit-val', 'n_clicks')
)
def update_output(n_clicks):
    if n_clicks>0:

        # main places df
        main_places = pd.DataFrame(columns=geoplaces.columns)
        # Get daa
        search_tweet = collecting_tweets.search_tweets(query='covid', bearer_token = bearer_token, next_token = None)

        # Check if we have tweet's location
        if "places" in search_tweet['includes'].keys():
            main_tweets, main_users, main_places = collecting_tweets.create_dataframes(search_tweet)
            
        else:
            main_tweets, main_users = collecting_tweets.create_dataframes(search_tweet)
            main_places = pd.DataFrame()

        # Generate dataframes
        try:
            main_tweets, main_users, main_places = collecting_tweets.more_tweets(20, "covid", search_tweet,  main_tweets, 
                                                                                main_users, main_places)

        except ValueError:
            main_tweets, main_users = collecting_tweets.more_tweets(20, "covid", search_tweet, main_tweets, 
                                                                    main_users, main_places)

        if main_places.empty:
            pass

        else:
            main_places["coords"] = main_places.full_name.apply(lambda x: loc(x))
            main_places=main_places[main_places.coords.isna()==False]
            
            if main_places.shape[0]!=0:
                main_places["lat"] = main_places.coords.apply(lambda x: x[0])
                main_places["lon"] = main_places.coords.apply(lambda x: x[1])
            else:
                pass

        #print(main_places.full_name.values)

        # Create class object
        pre_processor = PreProcessor()
        # Clean data and only keep 
        # the roots of each word.
        #tweets['clean_tweet'] = tweets.text.apply(pre_processor.removeNoise)
        main_tweets = pre_processor.lemmatizeWords(main_tweets)

        tokenizer = preprocessing.text.Tokenizer(num_words = 500000, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', 
                                                lower = True)

        # Updates internal vocabulary based on a list of texts.
        # In our case, since texts contains lists, it is assumed each entry of the lists to be a token.
        tokenizer.fit_on_texts(main_tweets['clean_tweet'].values)

        X = tokenizer.texts_to_sequences(main_tweets["clean_tweet"].values)
        X = preprocessing.sequence.pad_sequences(X, maxlen = 392)

        # Predict
        y_pred_scores = keras_model.predict(X)
        y_pred = np.round(y_pred_scores).astype(int)

        # Create columns with the result from the model
        main_tweets["toxic"] = y_pred
        main_tweets["pred_scores"] = y_pred_scores
        
        cleaned_df= hist.append(main_tweets, ignore_index=True)
        #hist = cleaned_df

        # aqui el hist es el que se tiene que cambiar
        #hist = hist.append(main_tweets, ignore_index=True)

        return 'You have updated the dashboard {} times'.format(
            n_clicks
        ), cleaned_df.to_json(date_format='iso', orient='split'), main_places.to_json(date_format='iso', orient='split')
    else:
        main_places=pd.DataFrame()
        return 'You have updated the dashboard {} times'.format(
            n_clicks
        ), hist.to_json(date_format='iso', orient='split'), main_places.to_json(date_format='iso', orient='split')

In [67]:
def update_output(n_clicks):
    if n_clicks>0:

        # Get daa
        search_tweet = collecting_tweets.search_tweets(query='covid', bearer_token = bearer_token, next_token = None)

        # Check if we have tweet's location
        if "places" in search_tweet['includes'].keys():
            main_tweets, main_users, main_places = collecting_tweets.create_dataframes(search_tweet)
            
        else:
            main_tweets, main_users = collecting_tweets.create_dataframes(search_tweet)
            main_places = pd.DataFrame()

        # Generate dataframes
        try:
            main_tweets, main_users, main_places = collecting_tweets.more_tweets(20, "covid", search_tweet,  main_tweets, 
                                                                                main_users, main_places)

        except ValueError:
            main_tweets, main_users = collecting_tweets.more_tweets(20, "covid", search_tweet, main_tweets, 
                                                                    main_users, main_places)

        if main_places.empty:
            pass

        else:
            main_places["coords"] = main_places.full_name.apply(lambda x: loc(x))
            main_places=main_places[main_places.coords.isna()==False]
            
            if main_places.shape[0]!=0:
                main_places["lat"] = main_places.coords.apply(lambda x: x[0])
                main_places["lon"] = main_places.coords.apply(lambda x: x[1])
            else:
                pass

        #print(main_places.full_name.values)

        # Create class object
        pre_processor = PreProcessor()
        # Clean data and only keep 
        # the roots of each word.
        #tweets['clean_tweet'] = tweets.text.apply(pre_processor.removeNoise)
        main_tweets = pre_processor.lemmatizeWords(main_tweets)

        tokenizer = preprocessing.text.Tokenizer(num_words = 500000, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', 
                                                lower = True)

        # Updates internal vocabulary based on a list of texts.
        # In our case, since texts contains lists, it is assumed each entry of the lists to be a token.
        tokenizer.fit_on_texts(main_tweets['clean_tweet'].values)

        X = tokenizer.texts_to_sequences(main_tweets["clean_tweet"].values)
        X = preprocessing.sequence.pad_sequences(X, maxlen = 392)

        # Predict
        y_pred_scores = keras_model.predict(X)
        y_pred = np.round(y_pred_scores).astype(int)

        # Create columns with the result from the model
        main_tweets["toxic"] = y_pred
        main_tweets["pred_scores"] = y_pred_scores
        
        cleaned_df= hist.append(main_tweets, ignore_index=True)

        return cleaned_df, main_places

In [68]:
@app.callback(
    Output('test-output', 'children'),
    Input('intermediate-value', 'data'),
)
def update_output(data):

    df = pd.read_json(data, orient='split')

    return 'The new df has {} tweets'.format(len(df))

In [69]:
# figures callbacks
@app.callback(
        [   
        Output('fig_1', "figure"),
        #Output('fig_2', "figure"),
        #Output('fig_3', "figure"),
        Output('fig_4', "figure"),
        #Output('fig_5', "figure"),
        ],
    Input('intermediate-value', 'data'),
)

def update_output(data):
    new_hist = pd.read_json(data, orient='split')

    # FIGURA 1
    fig_1 = px.histogram(new_hist, x="pred_scores", 
                   color="toxic",
                   labels={
                     "pred_scores": "Score",
                   },
                   marginal="box", # or violin, rug
                   template=template,
                   #hover_name='class',
                   color_discrete_sequence=px.colors.qualitative.G10,
                   nbins=50,
                   opacity=0.8
                   )

    fig_1.update_layout(
        title={"text": "Tweets Classification Scores", "x": 0.5}, 
        yaxis_title="Frequency",
        barmode='overlay', 
        legend_title_text='Toxic', 
        #paper_bgcolor='rgba(0,0,0,0)',
        #plot_bgcolor='rgba(0,0,0,0)'
    )

    # FIGURA 4 
    
    # Tweets by Hour
    a = new_hist.groupby([pd.Grouper(freq='D', key='created_at')]).tweet_id.count().reset_index()

    # Tweets by class and hour
    b = new_hist.groupby([pd.Grouper(freq='D', key='created_at'), 'toxic']).tweet_id.count().reset_index()

    # Merge both
    c = b.merge(a, right_on='created_at',left_on='created_at',how='left')

    # Filter tweets per class
    toxic = c[c['toxic']==1]
    no_toxic = c[c['toxic']==0]

    fig_4 = go.Figure()

    fig_4.add_trace(go.Scatter(
            x=c.created_at,
            y=c.tweet_id_y,
            name='Total Tweets',
            mode='markers+lines',
            line=dict(color='white', width=3),
            ))
    fig_4.add_trace(go.Bar(
            x=no_toxic.created_at,
            y=no_toxic.tweet_id_x,
            name='Non Toxic',
            opacity=.5,
            #visible="legendonly"
            marker=dict(color=px.colors.qualitative.G10[0])
            ))
    fig_4.add_trace(go.Bar(
            x=toxic.created_at,
            y=toxic.tweet_id_x,
            name='Toxic',
            #visible=True
            opacity=.5,
            marker=dict(color=px.colors.qualitative.G10[1])
            ))

    fig_4.update_layout(barmode='stack',template=template,title={"text": "Tweets Frequency Timeline", "x": 0.5},
            yaxis_title="Number of Tweets",
            #paper_bgcolor='rgba(0,0,0,0)',
            #plot_bgcolor='rgba(0,0,0,0)'
            #xaxis_title="Date"
            )

    return fig_1, fig_4

In [70]:
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
