In [44]:
# Libraries
import pandas as pd 
import numpy as np 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from mapsmx import MapsMX
import geopandas as gpd
import pyproj
import json
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [32]:
template='plotly_dark'

In [42]:
# Graph 1 - Tweeter score distribution

# Generate random scores and classes to test the ploting function
scores = np.random.rand(1000)
classes = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']
scores_df = pd.DataFrame(scores)
scores_df = scores_df.rename(columns={0:'score'})
scores_df['class'] = scores_df['score'].apply(lambda x: np.random.choice(classes))

# Generate dist plot
fig = px.histogram(scores_df, x="score", 
                   color="class",
                   labels={
                     "score": "Score",
                   },
                   marginal="violin", # or violin, rug
                   template=template,
                   #hover_name='class',
                   color_discrete_sequence=px.colors.qualitative.G10,
                   nbins=10
                   )

fig.update_layout(
    title={"text": "Tweets Classification Scores", "x": 0.5}, yaxis_title="Frequency"
)
fig.show()

In [92]:
# Tweet location distribution: 

# Generate random locations
geo_df = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))
geo_df['score']=np.random.rand(202).round(2)
geo_df['class']=[np.random.choice(classes) for i in range(202)]

px.set_mapbox_access_token(open("data/mapbox_token/.mapbox_token").read())
fig = px.scatter_mapbox(geo_df,
                        lat=geo_df.geometry.y,
                        lon=geo_df.geometry.x,
                        hover_name="name",
                        hover_data={'score':True},
                        template=template,
                        color='class',
                        #size='score',
                        color_discrete_sequence=px.colors.qualitative.G10,
                        zoom=1)
fig.update_layout(
    title={"text": "Tweets Toxicity Location", "x": 0.5}
)
fig.show()

In [91]:
fig = px.density_mapbox(geo_df,
                        lat=geo_df.geometry.y,
                        lon=geo_df.geometry.x, 
                        #z = 'score',
                        radius=12,
                        #center=dict(lat=geo_df.geometry.y, lon=geo_df.geometry.x), 
                        hover_data={'score':True},
                        template=template,
                        zoom=1,
                        )
fig.update_layout(
    title={"text": "Tweets Density Location", "x": 0.5}
)
fig.show()

In [147]:
# Tweet Timeline

# Generate random timestamps
from random import randrange
import datetime 

random_dates = []
def random_date(start,l):
   current = start
   while l >= 0:
    current = current + datetime.timedelta(minutes=randrange(10))
    yield current
    l-=1



startDate = datetime.datetime(2013, 9, 20,13,00)


for x in reversed(list(random_date(startDate,201))):
    random_dates.append(x.strftime("%d/%m/%y %H:%M"))

geo_df['time']=random_dates
geo_df['time']=pd.to_datetime(geo_df['time'])
geo_df['dt_str'] = geo_df['time'].apply(lambda x: x.strftime("%d/%m/%y %H"))

In [148]:
geo_df = geo_df.sort_values('time')

In [149]:
fig = px.scatter_mapbox(geo_df,
                        lat=geo_df.geometry.y,
                        lon=geo_df.geometry.x,
                        hover_name="name",
                        hover_data={'score':True},
                        template=template,
                        color='class',
                        #size='score',
                        color_discrete_sequence=px.colors.qualitative.G10,
                        zoom=1,
                        animation_frame="dt_str",
                        )
fig.update_layout(
    title={"text": "Tweets Toxicity Location per hour", "x": 0.5},
)
fig.write_html("data/html_files/plotly_mapbox_scatter.html")        
fig.show()


In [176]:
a = geo_df.groupby([pd.Grouper(freq='H', key='time')]).dt_str.count().reset_index()
a

Unnamed: 0,time,dt_str
0,2013-09-20 13:00:00,13
1,2013-09-20 14:00:00,12
2,2013-09-20 15:00:00,10
3,2013-09-20 16:00:00,14
4,2013-09-20 17:00:00,10
5,2013-09-20 18:00:00,14
6,2013-09-20 19:00:00,16
7,2013-09-20 20:00:00,12
8,2013-09-20 21:00:00,16
9,2013-09-20 22:00:00,9


In [179]:
b = geo_df.groupby([pd.Grouper(freq='H', key='time'), 'class']).dt_str.count().reset_index()
b

Unnamed: 0,time,class,dt_str
0,2013-09-20 13:00:00,identity_hate,5
1,2013-09-20 13:00:00,insult,2
2,2013-09-20 13:00:00,severe_toxic,3
3,2013-09-20 13:00:00,threat,1
4,2013-09-20 13:00:00,toxic,2
...,...,...,...
82,2013-09-21 04:00:00,insult,3
83,2013-09-21 04:00:00,obscene,1
84,2013-09-21 04:00:00,severe_toxic,1
85,2013-09-21 04:00:00,threat,3


In [181]:
c = b.merge(a, right_on='time',left_on='time',how='left')

In [189]:
c['class'].value_counts()

identity_hate    16
toxic            16
insult           15
threat           15
obscene          13
severe_toxic     12
Name: class, dtype: int64

In [190]:
hate = c[c['class']=='identity_hate']
insult = c[c['class']=='insult']
severe = c[c['class']=='severe_toxic']
threat = c[c['class']=='threat']
obscene = c[c['class']=='obscene']
toxic = c[c['class']=='toxic']

In [232]:
px.colors.qualitative.G10[1]

'#DC3912'

In [233]:
fig = go.Figure()

#fig.add_scatter(x=na.fecha_hechos, y=na.fk_euv, mode='lines+markers', name= 'No especificado')
#fig.add_scatter(x=emer.fecha_hechos, y=emer.fk_euv, mode='lines+markers', name='Emergencia')
#fig.add_scatter(x=prev.fecha_hechos, y=prev.fk_euv, mode='lines+markers', name='Preventiva')

fig.add_trace(go.Scatter(
        x=c.time,
        y=c.dt_str_y,
        name='Total Tweets',
        #text=na['var'],
        #hovertemplate=
        #"<b>%{text}</b><br><br>" +
        #"GDP per Capita: %{x:$,.0f}<br>" +
        #"Life Expectation: %{y:.0%}<br>" +
        #"Population: %{marker.size:,}" +
        #"<extra></extra>",
        #marker_size=continent['size'],
        mode='markers+lines',
        line=dict(color='white', width=3),
        ))
fig.add_trace(go.Bar(
        x=toxic.time,
        y=toxic.dt_str_x,
        name='Toxic',
        #visible=True
        opacity=.5,
        marker=dict(color=px.colors.qualitative.G10[0])
        ))
fig.add_trace(go.Bar(
        x=severe.time,
        y=severe.dt_str_x,
        name='Severe Toxic',
        opacity=.5,
        #visible="legendonly"
        marker=dict(color=px.colors.qualitative.G10[1])
        ))
fig.add_trace(go.Bar(
        x=hate.time,
        y=hate.dt_str_x,
        name='Hate',
        opacity=.5,
        marker=dict(color=px.colors.qualitative.G10[2])
        #visible="legendonly"
        ))
fig.add_trace(go.Bar(
        x=insult.time,
        y=insult.dt_str_x,
        name='Insult',
        opacity=.5,
        marker=dict(color=px.colors.qualitative.G10[3])
        #visible="legendonly"
        ))
fig.add_trace(go.Bar(
        x=obscene.time,
        y=obscene.dt_str_x,
        name='Obscene',
        opacity=.5, 
        marker=dict(color=px.colors.qualitative.G10[4])
        #visible="legendonly"
        ))
fig.add_trace(go.Bar(
        x=threat.time,
        y=threat.dt_str_x,
        name='Threat',
        opacity=.5,
        marker=dict(color=px.colors.qualitative.G10[5])
        #visible="legendonly"
        ))

fig.update_layout(barmode='stack',template=template,title={"text": "Tweets Frequency Timeline", "x": 0.5},)