## Import Libraries

In [13]:
# import dash libraries
import dash
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from dash import dash_table as dt

# import plotly libraries
import plotly.express as px
import plotly.graph_objects as go

# import misc libraries
import pandas as pd
import numpy as np
import datetime
from datetime import date, datetime
import base64
from io import BytesIO

# import NLP libraries
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.sentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud

## Upload Raw Data

In [14]:
# read tweet data and reorder columns
df = pd.read_csv('/Users/colincunningham/holocron/archive/cleandata.csv')
df = df[['Date', 'Retweets', 'Likes', 'Cleaned_Tweets']]
df.head()

Unnamed: 0,Date,Retweets,Likes,Cleaned_Tweets
0,2022-10-27 16:17:39,209,7021,thanks
1,2022-10-27 13:19:25,755,26737,Absolutely
2,2022-10-27 13:08:00,55927,356623,Dear Twitter Advertisers
3,2022-10-26 21:39:32,9366,195546,Meeting a lot of cool people at Twitter today!
4,2022-10-26 18:45:58,145520,1043592,Entering Twitter HQ – let that sink in!


## Data Preprocessing

In [15]:
# convert all text to lowercase
df['Text'] = df['Cleaned_Tweets'].apply(str.lower)


# tokenize text data
regexp = RegexpTokenizer('\w+')
df['text_token']=df['Text'].apply(regexp.tokenize)


# remove stopwords
stopwords = nltk.corpus.stopwords.words("english")
df['text_token'] = df['text_token'].apply(lambda x: [item for item in x if item not in stopwords])


# remove 1/2 letter words
df['text_string'] = df['text_token'].apply(lambda x: ' '.join([item for item in x if len(item)>2]))


# perform lemmatization
wordnet_lem = WordNetLemmatizer()
df['text_string_lem'] = df['text_string'].apply(wordnet_lem.lemmatize)


# perform sentiment analysis using polarity score
analyzer = SentimentIntensityAnalyzer()
df['polarity'] = df['text_string_lem'].apply(lambda x: analyzer.polarity_scores(x))

df = pd.concat(
    [df.drop('polarity', axis=1), 
     df['polarity'].apply(pd.Series)], axis=1)


# create sentiment variable
df['Sentiment'] = df['compound'].apply(lambda x: 'positive' if x >0 else 'neutral' if x==0 else 'negative')


# remove intermediate variables
df.drop(['Cleaned_Tweets', 'text_token', 'text_string', 
         'neg', 'neu', 'pos', 'compound'], axis=1, inplace=True)


# reorder columns in df
df = df[['Date', 'Retweets', 'Likes', 'Sentiment', 'Text', 'text_string_lem']]

df.head()

Unnamed: 0,Date,Retweets,Likes,Sentiment,Text,text_string_lem
0,2022-10-27 16:17:39,209,7021,positive,thanks,thanks
1,2022-10-27 13:19:25,755,26737,neutral,absolutely,absolutely
2,2022-10-27 13:08:00,55927,356623,positive,dear twitter advertisers,dear twitter advertisers
3,2022-10-26 21:39:32,9366,195546,positive,meeting a lot of cool people at twitter today!,meeting lot cool people twitter today
4,2022-10-26 18:45:58,145520,1043592,neutral,entering twitter hq – let that sink in!,entering twitter let sink


## Application Style / Layout

In [16]:
# set sidebar style
sidebar_style = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "20%",
    "padding": "20px 10px",
    "background-color": "#f8f9fa",
}

# set content style
content_style = {
    "position": "fixed",
    "text-align": "left",
    "top": 0,
    "right": 0,
    "bottom": 0,
    "width": "75%",
    "padding": "20px 10px"
}

## Dash Web Application

In [17]:
# create app and add theme
app = JupyterDash(__name__)
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])


# define sidebar layout
sidebar = html.Div(
    [
        html.H6("Elon's Tweets", className="display-4"),
        html.Hr(),
        html.P("This app is built to analyze Elon Musk's tweets from Jan 1 2022 to Dec 31 2022"),
        
        html.Br(),
        
        html.H5('Select Date Range',
                style={'textAlign': 'left'}
               ),
        dcc.DatePickerRange(
        id='date-picker',
        min_date_allowed=date(2022, 1, 1),
        max_date_allowed=date(2022, 12, 31),
        initial_visible_month=date(2022, 1, 1),
        ),
        
        html.Br(),
        html.Br(),
        html.Br(),
        
        html.H5('Min. # of RTs',
                style={'textAlign': 'left'}
        ),
        dcc.Input(
            id='min_rt',
            type='number',
            placeholder=0,
            
        ),
        
        html.Br(),
        html.Br(),
        html.Br(),
        
        html.H5('Min. # of Likes',
                style={'textAlign': 'left'}
        ),
        dcc.Input(
            id='min_likes',
            type='number',
            placeholder=0,
            
        ),
        
        html.Br(),
        html.Br(),
        
        dbc.Button('Generate Report',
                   id='submit-button',
                   n_clicks=0
        )
        

    ],
    style=sidebar_style,
)

# set empty div to hold content on callback
content = html.Div(id="page-content", style=content_style)


# define callback function to upload content
@app.callback(
    Output('page-content', 'children'),
    Input('submit-button', 'n_clicks'),
    State('date-picker', 'start_date'),
    State('date-picker', 'end_date'),
    State('min_rt', 'value'),
    State('min_likes', 'value')
)
def load_content(n_clicks, st_dt, e_dt, rts, likes):
    if not n_clicks:
        return dash.no_update
    else:
        # filter data based on user parameters
        df_upd = df[(df['Date'] > st_dt) & (df['Date'] < e_dt)]
        df_upd = df_upd[df_upd['Retweets'] > rts]
        df_upd = df_upd[df_upd['Likes'] > likes]

        
        # create word cloud
        all_words_lem = ' '.join([word for word in df_upd['text_string_lem']])
        wordcloud = WordCloud().generate(all_words_lem)
        image_bytes = BytesIO()
        wordcloud.to_image().save(image_bytes, format='PNG')
        image_bytes.seek(0)
        image_base64 = base64.b64encode(image_bytes.read()).decode("utf-8")
        
        
        # group by sentiment score and get count
        sent_df = df_upd.groupby(['Sentiment']).size().reset_index(name='count')
        
        color_mapping = {'positive': 'rgb(144, 238, 144)',
                 'negative': 'rgb(255,153,153)',
                 'neutral': 'rgb(50, 133, 190)'}

        sent_fig = go.Figure(data=go.Pie(labels=sent_df['Sentiment'], values=sent_df['count']))
        sent_fig.update_traces(textposition='inside', textinfo='percent+label', marker=dict(colors=[color_mapping[label] for label in sent_df['Sentiment']]))
        sent_fig.update_layout(showlegend=False, height=200, margin=dict(t=0, b=0, l=0, r=0))
        
        
        # group by date and get count
        avg_rt = int(df_upd['Retweets'].mean().round(0))
        avg_like = int(df_upd['Likes'].mean().round(0))
        avg_data = [['Avg. RTs', avg_rt], ['Avg. Likes', avg_like]]  
        avg_df = pd.DataFrame(avg_data, columns=['Avgs', 'Count'])
        
        avg_fig = go.Figure(go.Bar(
            x=avg_df['Count'],
            y=avg_df['Avgs'],
            orientation='h'))
        avg_fig.update_layout(height=200, margin=dict(t=0, b=0, l=0, r=0))

        
        # display df as dashtable
        dash_df = dt.DataTable(
                            id='dash_table',
                            data = df_upd.to_dict('records'),
                            columns=[{'name': i, 'id': i} for i in df_upd.columns],
                            style_table={'overflowX': 'auto'},
                            editable=False,
                            page_current=0,
                            page_size=10,
                            sort_action='native',
                            export_format='csv',
                            style_cell={'textAlign': 'left'}
        )
        
        ttl_tweets = len(df_upd)
        
        # return the content that contains the wordcloud, pie chart, bar chart, and raw tweet data
        return dbc.Container(
            dbc.Card([
            dbc.CardHeader(f"Tweet Analysis - Total Tweets: {ttl_tweets}"),
            dbc.CardBody(
                [
                    dbc.Row(
                        [
                            dbc.Col(
                                    [
                                        html.H5('Word Cloud', style={'text-align': 'center'}),
                                        dbc.CardBody(html.Img(src=f"data:image/png;base64,{image_base64}")),
                                    ],
                                    className="subcard-1",
                            ),
                            dbc.Col(
                                    [
                                        html.H5('Sentiment Analysis', style={'text-align': 'center'}),
                                        dbc.CardBody(dcc.Graph(id='sent_graph', figure=sent_fig)),
                                    ],
                                    className="subcard-2",
                                
                            ),
                            dbc.Col(
                                    [
                                        html.H5('Tweet Metrics', style={'text-align': 'center'}),
                                        dbc.CardBody(dcc.Graph(id='time_graph', figure=avg_fig)),
                                    ],
                                    className="subcard-3",
                            ),
                        ],
                        className="align-items-center",
                    ),
                    
                    html.Br(),
                    html.Br(),
                    
                    html.H5("Raw Tweet Data"),
                    html.Div(dash_df),
                ]
            ),
        ],
        className="main-card",
    )
)    

# create app layout
app.layout = html.Div([dcc.Location(id="url"), sidebar, content])

# build app
app.run_server(mode='jupyterlab')