### Library import and code configuration

In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file, save
from bokeh.models import ColumnDataSource, Circle, Label, FactorRange
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
from bokeh.tile_providers import STAMEN_TONER
from bokeh.layouts import gridplot, row, column, widgetbox
from pyproj import Proj, transform
from math import pi

In [2]:
pd.options.display.float_format = '{:.2f}'.format

### Dataframe

In [3]:
file = '/media/dedsresende/BackUpDeds/Github_UB/CapstoneProject/Feelings/kagle_capstone_project/presentacion/data/tweets_public.csv'

In [4]:
df = pd.read_csv(file)
df.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,569237160886276096,negative,1.0,Can't Tell,0.65,Delta,,venkatesh_cr,,0,@JetBlue I've been in pricing for 8 years to k...,,2015-02-21 12:48:09 -0800,Austin Texas,Central Time (US & Canada)
1,569267194028298241,negative,1.0,Customer Service Issue,1.0,Southwest,,ChristineFlores,,0,"@SouthwestAir AH - did DM, no reply. On hold n...",,2015-02-21 14:47:30 -0800,,Central Time (US & Canada)
2,569506670189137920,negative,0.65,Lost Luggage,0.65,United,,szymanski_t,,0,@united if you lost my belongings then BE HONEST!,,2015-02-22 06:39:05 -0800,,Eastern Time (US & Canada)
3,570293957739081728,negative,1.0,Customer Service Issue,1.0,United,,nate2482,,0,@United the internet is a great thing. I am e...,,2015-02-24 10:47:29 -0800,"Parkersburg, WV",Eastern Time (US & Canada)
4,570212129313316864,neutral,1.0,,,Delta,,elias_rubin,,0,@JetBlue I believe that the website said I cou...,,2015-02-24 05:22:20 -0800,"New York, NY",Pacific Time (US & Canada)


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8784 entries, 0 to 8783
Data columns (total 15 columns):
tweet_id                        8784 non-null int64
airline_sentiment               8784 non-null object
airline_sentiment_confidence    8784 non-null float64
negativereason                  5531 non-null object
negativereason_confidence       6325 non-null float64
airline                         8784 non-null object
airline_sentiment_gold          24 non-null object
name                            8784 non-null object
negativereason_gold             20 non-null object
retweet_count                   8784 non-null int64
text                            8784 non-null object
tweet_coord                     608 non-null object
tweet_created                   8784 non-null object
tweet_location                  5936 non-null object
user_timezone                   5973 non-null object
dtypes: float64(2), int64(2), object(11)
memory usage: 1.0+ MB


In [6]:
df['tweet_id'] = df['tweet_id'].astype('category')
df['airline_sentiment'] = df['airline_sentiment'].astype('category')
df['airline_sentiment_confidence'] = df['airline_sentiment_confidence'].astype('float64')
df['negativereason'] = df['negativereason'].astype('category')
df['negativereason_confidence'] = df['negativereason_confidence'].astype('float64')
df['airline'] = df['airline'].astype('category')
df['airline_sentiment_gold'] = df['airline_sentiment_gold'].astype('category')
df['name'] = df['name'].astype('category')
df['negativereason_gold'] = df['negativereason_gold'].astype('category')
df['retweet_count'] = df['retweet_count'].astype('int64')
df['text'] = df['text'].astype('str')
df['tweet_coord'] = df['tweet_coord'].astype('str')
df['tweet_created'] = pd.to_datetime(df['tweet_created'])
df['tweet_location'] = df['tweet_location'].astype('category')
df['user_timezone'] = df['user_timezone'].astype('category')

In [7]:
df['lat'] = df['tweet_coord'].apply(lambda x: np.nan if x == 'nan' else float((x.replace('[',' ').replace(']',' ').replace(' ','').split(','))[0]))
df['lon'] = df['tweet_coord'].apply(lambda x: np.nan if x == 'nan' else float((x.replace('[',' ').replace(']',' ').replace(' ','').split(','))[1]))

In [8]:
df['lon3857'] = [transform(Proj(init='epsg:4326'), Proj(init='epsg:3857'), row['lon'], row['lat'])[0] for index, row in df.iterrows()]
df['lat3857'] = [transform(Proj(init='epsg:4326'), Proj(init='epsg:3857'), row['lon'], row['lat'])[1] for index, row in df.iterrows()]

### Dashboard

Colors

In [9]:
rd = '#E4523B'
dkbl = '#0A454D'
ltbl = '#3DB296'
ltyl = '#ECC417'
dkyl = '#E8931E'
cz = '#969696'

In [10]:
colors = {'negative':rd, 'positive':ltbl, 'neutral':cz}
df['sent_colors'] = df['airline_sentiment'].map(colors)

Control configs

In [11]:
TOOLS = "tap,box_select,lasso_select,pan,wheel_zoom,box_zoom,reset"

Labels

In [12]:
tltPlot = figure(x_range=(1, 9), y_range=(20, 100), plot_width=1200, plot_height=100)
tltPlot.xgrid.visible = False
tltPlot.ygrid.visible = False
tltPlot.axis.visible = False
tltPlot.outline_line_color = None

label = Label(x=1, y=25, text='Capstone Project: Feelings', text_font_size='36pt', text_color='black')


tltPlot.add_layout(label)

In [13]:
lblPlot1 = figure(x_range=(1, 9), y_range=(20, 100), plot_width=400, plot_height=100, title = 'Negative tweets')
lblPlot1.xgrid.visible = False
lblPlot1.ygrid.visible = False
lblPlot1.axis.visible = False
lblPlot1.title.text_color = "black"
lblPlot1.outline_line_color = None

label1 = Label(x=1, y=18, text=str(df[df['airline_sentiment']=='negative']['airline_sentiment'].size), text_font_size='36pt', text_color=rd)


lblPlot1.add_layout(label1)

In [14]:
lblPlot2 = figure(x_range=(1, 9), y_range=(20, 100), plot_width=400, plot_height=100, title = 'Positive tweets')
lblPlot2.xgrid.visible = False
lblPlot2.ygrid.visible = False
lblPlot2.axis.visible = False
lblPlot2.title.text_color = "black"
lblPlot2.outline_line_color = None

label2 = Label(x=1, y=18, text=str(df[df['airline_sentiment']=='positive']['airline_sentiment'].size), text_font_size='36pt', text_color=ltbl)

lblPlot2.add_layout(label2)

In [15]:
lblPlot3 = figure(x_range=(1, 9), y_range=(20, 100), plot_width=400, plot_height=100, title = 'Neutral tweets')
lblPlot3.xgrid.visible = False
lblPlot3.ygrid.visible = False
lblPlot3.axis.visible = False
lblPlot3.title.text_color = "black"
lblPlot3.outline_line_color = None

label3 = Label(x=1, y=18, text=str(df[df['airline_sentiment']=='neutral']['airline_sentiment'].size), text_font_size='36pt', text_color=cz)

lblPlot3.add_layout(label3)

Map configs

In [16]:
plotMapBase = figure(
    x_range=(df['lon3857'].min()-2500000, df['lon3857'].max()+2500000),
    y_range=(df['lat3857'].min()-2500000, df['lat3857'].max()+2500000),
    plot_width=1200,
    plot_height=600,
    tools = TOOLS,
    title = 'Where are the tweets comming from'
)

plotMapBase.title.text_color = "black"
plotMapBase.add_tile(STAMEN_TONER)
plotMapBase.xgrid.visible = False
plotMapBase.ygrid.visible = False
plotMapBase.axis.visible = False

Data sources

In [17]:
source = ColumnDataSource(
    data = dict(
        lat = df['lat3857'],
        lng = df['lon3857'],
        sent = df['airline_sentiment'],
        cl = df['sent_colors']
    )
)

Map markers

In [18]:
mapMarker = Circle(x='lng', y='lat', size=5, fill_color='cl', fill_alpha=0.5, line_color='cl')
plotMapBase.add_glyph(source, mapMarker)

Bar chart airlines sentiment

In [19]:
dfAirlines = df.groupby(['airline', 'airline_sentiment'])['airline', 'airline_sentiment'].size().reset_index()

dfAirlines.columns = ['airline','airline_sentiment','number']

dfAirlines['airline'] = dfAirlines['airline'].astype('str')
dfAirlines['airline_sentiment'] = dfAirlines['airline_sentiment'].astype('str')

dfAirlines['x'] = dfAirlines['airline']+' '+dfAirlines['airline_sentiment']

barSource = ColumnDataSource(
    data = dict(
        x = dfAirlines['x'],
        arln = dfAirlines['airline'],
        snt = dfAirlines['airline_sentiment'],
        n = dfAirlines['number'],
        col = dfAirlines['airline_sentiment'].map(colors)
    )
)

In [20]:
barSentiment = figure(x_range=FactorRange(*dfAirlines['x']), plot_height=200, plot_width = 1200, title="Sentiment by Airlines")

barSentiment.vbar(x='x', top='n', width=0.9, source=barSource, line_color="white",fill_color='col')

barSentiment.y_range.start = 0
barSentiment.x_range.range_padding = 0.1
barSentiment.xaxis.major_label_orientation = pi/4
barSentiment.xgrid.visible = False
barSentiment.ygrid.visible = False
barSentiment.outline_line_color = None

Time line

In [21]:
dfDate = df.groupby([df['tweet_created'].dt.date, df['airline_sentiment']]).size().reset_index()
dfDate.columns = ['date','airline_sentiment','number']

dateSource = ColumnDataSource(
    data = dict(
        x = dfDate[dfDate['airline_sentiment']=='negative']['date'],
        yNeg = dfDate[dfDate['airline_sentiment']=='negative']['number'],
        yPos = dfDate[dfDate['airline_sentiment']=='positive']['number'],
        yNtr = dfDate[dfDate['airline_sentiment']=='neutral']['number']
    )
)

In [22]:
dateSentiment = figure(x_axis_type="datetime", plot_height=200, plot_width = 600, title="Sentiment by time")

dateSentiment.circle('x', 'yNeg', source=dateSource, color=rd)
dateSentiment.line('x', 'yNeg', source=dateSource, color=rd)

dateSentiment.circle('x', 'yPos', source=dateSource, color=dkbl)
dateSentiment.line('x', 'yPos', source=dateSource, color=dkbl)

dateSentiment.circle('x', 'yNtr', source=dateSource, color=cz)
dateSentiment.line('x', 'yNtr', source=dateSource, color=cz)

dateSentiment.xgrid.visible = False
dateSentiment.ygrid.visible = False
dateSentiment.outline_line_color = None

In [23]:
dfHour = df.groupby([df['tweet_created'].dt.hour, df['airline_sentiment']]).size().reset_index()
dfHour.columns = ['hour','airline_sentiment','number']

hourSource = ColumnDataSource(
    data = dict(
        x = dfHour[dfHour['airline_sentiment']=='negative']['hour'],
        yNeg = dfHour[dfHour['airline_sentiment']=='negative']['number'],
        yPos = dfHour[dfHour['airline_sentiment']=='positive']['number'],
        yNtr = dfHour[dfHour['airline_sentiment']=='neutral']['number']
    )
)

In [24]:
hourSentiment = figure(x_axis_type="datetime", plot_height=200, plot_width = 600, title="Sentiments during the day")

hourSentiment.circle('x', 'yNeg', source=hourSource, color=rd)
hourSentiment.line('x', 'yNeg', source=hourSource, color=rd)

hourSentiment.circle('x', 'yPos', source=hourSource, color=dkbl)
hourSentiment.line('x', 'yPos', source=hourSource, color=dkbl)

hourSentiment.circle('x', 'yNtr', source=hourSource, color=cz)
hourSentiment.line('x', 'yNtr', source=hourSource, color=cz)

hourSentiment.xgrid.visible = False
hourSentiment.ygrid.visible = False
hourSentiment.outline_line_color = None

Plot

In [26]:
p = gridplot([[tltPlot],[lblPlot1, lblPlot2, lblPlot3],[plotMapBase],[barSentiment],[dateSentiment,hourSentiment]])
show(p)
# output_file("tourist_dashboard.html")
# save(p)

W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure(id='3855b863-5ce6-423c-8669-4d742bdacd5e', ...)
W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure(id='90bb4725-0ba3-40dd-9814-24da56dacc3f', ...)
W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure(id='9f30295a-6cee-4528-b281-46f440e8c6b0', ...)
W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure(id='ba23609b-f653-47ce-b068-9d69e24ae79f', ...)
