In [1]:
import pandas as pd
from bokeh.io import show, output_file#, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Title
from bokeh.models.widgets import Div
from bokeh.palettes import viridis
from bokeh.layouts import widgetbox, column
import datetime as dt

In [2]:
column_names =['user','text','favorite_count','time','retweet_count','hashtags']
column_dtypes = {'user': 'str','text': 'str', 'favorite_count': 'int', 'time':'str', 'retweet_count': 'int', 'hashtags': 'str'}

In [3]:
df = pd.read_csv('data/indeed_tweets.txt', sep="|", parse_dates=['time'], infer_datetime_format= True, index_col = False, names= column_names, dtype=column_dtypes)
df.head()

Unnamed: 0,user,text,favorite_count,time,retweet_count,hashtags
0,indeed,5 questions to ask yourself before a job inter...,15,2017-05-29 15:30:55,8,"jobsearch, indeed"
1,indeed,Small businesses are increasingly attracting j...,5,2017-05-25 18:38:33,5,"smb, jobs"
2,indeed,Become a #networking expert in 7 easy steps #J...,9,2017-05-25 14:30:52,7,"networking, jobsearch, indeed"
3,indeed,We take an in-depth look at #HR careers - what...,10,2017-05-25 14:27:02,5,hr
4,indeed,RT @indeed: As the sun sets on #indeedinteract...,0,2017-05-23 09:17:10,15,indeedinteractive


In [4]:
def convert_time(timing):
    return timing.strftime("%m/%d/%y %I:%m %p")

In [5]:
minDate = df['time'].min()
minDate

Timestamp('2009-08-04 17:17:59')

In [6]:
maxDate= df['time'].max()
maxDate

Timestamp('2017-05-30 21:58:54')

In [7]:
df['display_time'] = df['time'].apply(convert_time)

In [8]:
df.dtypes

user                      object
text                      object
favorite_count             int64
time              datetime64[ns]
retweet_count              int64
hashtags                  object
display_time              object
dtype: object

In [9]:
df.describe()

Unnamed: 0,favorite_count,retweet_count
count,14421.0,14421.0
mean,6.350392,4.665904
std,47.976843,28.113903
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,1.0,2.0
max,3503.0,2151.0


In [10]:
users = df['user'].unique()
users

array(['indeed', 'IndeedCanada', 'IndeedAU', 'IndeedNZ', 'IndeedJapan',
       'IndeedUK', 'IndeedIE', 'Indeedfrancais', 'IndeedNL',
       'IndeedEspana', 'IndeedSverige', 'IndeedPolska', 'IndeedItalia',
       'IndeedDeutsch', 'IndeedKorea', 'Indeed_India'], dtype=object)

In [11]:
output_file('indeed_tweets_visual.html', title="Indeed World Tweets",mode="cdn")

In [12]:
#DEV
#output_notebook()

In [13]:
#hover = HoverTool(tooltips=[("User", "@user"),("Summary", "@summary..."),("Time", "@display_time"),("# of favorites", "@favorite_count")])

In [14]:
hover = HoverTool(tooltips=
                 """<div style="width: 450px"><p style="padding: 5px"><b>@user</b><br>"@text"<br><b><i>@display_time</i></b><br><font color='1da1f2'><b>@favorite_count</b> favorite count</font></div>""")

In [15]:
colors = viridis(16)

In [16]:
p = figure(plot_width = 1200, plot_height = 600, x_axis_type='datetime', tools=[hover,'pan','box_zoom','wheel_zoom','reset'], webgl=True, title="Tweets from Indeed around the World", logo=None, toolbar_location="above")
#y_range(0,1500)

In [17]:
for i, user in enumerate(users):
    data = ColumnDataSource(df[df['user'] == user][['favorite_count','time','user','text','display_time']])
    p.line('time', 'favorite_count', color=colors[i], legend=user, line_width=2, source=data)
    #p.circle(x,y, color=colors[i], selection_color="red", nonselection_fill_alpha=0.2, nonselection_fill_color=colors[i], nonselection_line_color="red", nonselection_line_alpha=1.0)

In [18]:
p.legend.location='top_left'
p.legend.click_policy="hide"
p.title.text_font_size = "25px"
#p.add_layout(Title(text="*Click on legend to display/remove Indeed Users", align="left"), "below")
p.xaxis.axis_label="Date"
p.yaxis.axis_label="# of favorites"

In [19]:
#slider =DateRangeSlider(bounds=(minDate,maxDate))
#show(widgetbox(slider))

In [20]:
div = Div(text=
          """<ul>
                  <li>Click on legend to <b><i>display/hide users</b></i>.</li>
                  <li>You may <b><i>Pan, Zoom (Box or Wheel), Hover, and Reset</b></i> the graph.</li>
            </ul>
            <br><br>
            <hr></hr>
            <p style="text-align: right">Visualization created using a data pipeline (twitter API + python + html, css, javascript). Find most of the source code here: <a href='https://github.com/ashmle/indeed_tweets' target='_blank'>Andres Mack Github</a>
            <br>Go back to my personal website <a href='http://andresmack.com'>here</a></p>""",
width=1200, height=200)

In [21]:
layout = column(p, div)

In [22]:
show(layout)