In [3]:
import pandas as pd
import json

In [4]:
tweets = []
for line in open('data/keyword-malioboro.json', 'r', encoding='UTF-8'):
    tweets.append(json.loads(line))

In [5]:
df = pd.json_normalize(tweets)

In [6]:
len(df)

88035

In [8]:
df['created_at'].head()

0    2021-01-31 06:52:24 SE Asia Standard Time
1    2021-01-31 06:47:59 SE Asia Standard Time
2    2021-01-31 05:59:00 SE Asia Standard Time
3    2021-01-31 05:50:14 SE Asia Standard Time
4    2021-01-31 04:58:16 SE Asia Standard Time
Name: created_at, dtype: object

In [11]:
from dateutil.parser import parse

df['created'] = [x.replace(" SE Asia Standard Time", "") for x in df['created_at']]

In [12]:
df['date'] = [parse(date).date() for date in df['created']]
df['monthyear'] = pd.to_datetime(df['date']).dt.to_period('M')

In [8]:
df['date'].value_counts().sort_index()

2020-01-01    174
2020-01-02    183
2020-01-03    137
2020-01-04    156
2020-01-05    148
             ... 
2021-01-27    222
2021-01-28     94
2021-01-29    108
2021-01-30    151
2021-01-31     24
Name: date, Length: 397, dtype: int64

In [13]:
by_month = pd.to_datetime(df['date']).dt.to_period('M').value_counts().sort_index()
by_month.index = pd.PeriodIndex(by_month.index)

df_month = by_month.rename_axis('month').reset_index(name='counts')
df_month

Unnamed: 0,month,counts
0,2020-01,5762
1,2020-02,5696
2,2020-03,5076
3,2020-04,4457
4,2020-05,6085
5,2020-06,6547
6,2020-07,5696
7,2020-08,5697
8,2020-09,5446
9,2020-10,16426


In [16]:
import plotly.express as px
import plotly.graph_objs as go

fig = go.Figure(data=go.Scatter(x=df_month['month'].astype(dtype=str), 
                        y=df_month['counts'],
                        marker_color='indianred', text="counts"))

fig.update_layout({"title": 'Tweets about Malioboro from Jan 2020 to Jan 2021',
                   "xaxis": {"title":"Months"},
                   "yaxis": {"title":"Total tweets"},
                   "showlegend": False})
fig.write_image("by-month.png",format="png", width=1000, height=600, scale=3)
fig.show()

In [17]:
fig = go.Figure(data=go.Bar(x=df_month['month'].astype(dtype=str), 
                        y=df_month['counts'],
                        marker_color='indianred', text="counts"))

fig.update_layout({"title": 'Tweets about Malioboro from Jan 2020 to Jan 2021',
                   "xaxis": {"title":"Months"},
                   "yaxis": {"title":"Total tweets"},
                   "showlegend": False})
fig.write_image("by-month-bar.png",format="png", width=1000, height=600, scale=3)
fig.show()

In [19]:
by_date = pd.Series(df['date']).value_counts().sort_index()
by_date.index = pd.DatetimeIndex(by_date.index)

df_date = by_date.rename_axis('date').reset_index(name='counts')
df_date

Unnamed: 0,date,counts
0,2020-01-01,174
1,2020-01-02,183
2,2020-01-03,137
3,2020-01-04,156
4,2020-01-05,148
...,...,...
392,2021-01-27,222
393,2021-01-28,94
394,2021-01-29,108
395,2021-01-30,151


In [20]:
fig = go.Figure(data=go.Scatter(x=df_date['date'].astype(dtype=str), 
                                y=df_date['counts'],
                                marker_color='black', text="counts"))

fig.update_layout({"title": 'Tweets about Malioboro from Jan 2020 to Jan 2021 Day by Day',
                   "xaxis": {"title":"Time"},
                   "yaxis": {"title":"Total tweets"},
                   "showlegend": False})

fig.show()
fig.write_image("timeline-by-day.png",format="png", width=1000, height=600, scale=2)

In [28]:
top_dates = df_date.sort_values(by=['counts'],ascending=False).head(3)
vals = []
for tgl, tot in zip(top_dates["date"], top_dates["counts"]):
    tgl = tgl.strftime("%d %B")
    val = "%d (%s)"%(tot, tgl)
    vals.append(val)
top_dates['tgl'] = vals
top_dates

Unnamed: 0,date,counts,tgl
281,2020-10-08,5213,5213 (08 October)
365,2020-12-31,2995,2995 (31 December)
277,2020-10-04,2122,2122 (04 October)


In [29]:
fig = go.Figure(data=go.Scatter(x=df_date['date'].astype(dtype=str), 
                                y=df_date['counts'],
                                marker_color='black', text="counts"))

fig.update_layout({"title": 'Tweets about Malioboro from Jan 2020 to Jan 2021 Day by Day',
                   "xaxis": {"title":"Time"},
                   "yaxis": {"title":"Total tweets"},
                   "showlegend": False})
fig.add_traces(go.Scatter(x=top_dates['date'], y=top_dates['counts'],
                          textposition='top left',
                          textfont=dict(color='#233a77'),
                          mode='markers+text',
                          marker=dict(color='red', size=6),
                          text = top_dates["tgl"]))
fig.show()
fig.write_image("timeline-day-notes.png",format="png", width=1000, height=600, scale=2)