## Data Visualization: Effectiveness, Expressiveness and an Alternate Encoding 

Visual 6:

!["viz6"](assets/chart_2.png) 

We'll focus on trying to recreate the same visualization using Altair  

In [2]:
import pandas as pd
import altair as alt
import numpy as np
import math

In [6]:
tweets = pd.read_csv('assets/tweets.csv')
tweets = pd.read_csv('assets/tweets.csv')

tweets['emojis'] = tweets['text'].str.findall(r'[^\w\s.,"@\'?/#!$%\^&\*;:{}=\-_`~()\U0001F1E6-\U0001F1FF]').str.len()

boxer_emojis = ['☘️','🇮🇪','🍀','💸','🤑','💰','💵','😴','😂','🤣','🥊','👊','👏','🇮🇪','💪','🔥','😭','💰']
for emoji in boxer_emojis:
    tweets[emoji] = tweets.text.str.count(emoji)
    
tweets['irish_pride'] = tweets['☘️'] + tweets['🇮🇪'] + tweets['🍀']
tweets['money_team'] = tweets['💸'] + tweets['🤑'] + tweets['💰'] +  tweets['💵']
tweets['datetime'] = pd.to_datetime(tweets['created_at'])
tweets = tweets.set_index('datetime')


teams = tweets.copy()
teams = teams.resample('1s').sum()
teams  = teams[(teams['💸']>0) | (teams['🤑']>0) | (teams['💰']>0) | (teams['💵']>0) | (teams['☘️']>0) | (teams['🍀']>0) | (teams['🇮🇪']>0) ]

mdf = teams['money_team'].rolling('4Min').mean().reset_index()
mdf['team'] = '💸🤑💰💵'
mdf = mdf.rename(columns={'money_team':'tweet_count'})

idf = teams['irish_pride'].rolling('4Min').mean().reset_index()
idf['team'] = '☘️🍀🇮🇪'
idf = idf.rename(columns={'irish_pride':'tweet_count'})

ndf = pd.concat([mdf,idf])

In [7]:
annotations = [['2017-08-27 00:15:00',4, 'Fight begins'],
               ['2017-08-27 00:22:00',5, 'McGregor does OK \nin the early rounds'],
               ['2017-08-27 00:53:00',4, 'Mayweather takes \nover and wins by \nTKO']]
a_df = pd.DataFrame(annotations, columns=['date','count','note'])

In [8]:
a_df['date'] = pd.to_datetime(a_df['date'])

In [9]:
alt.themes.enable('fivethirtyeight')
bars=alt.Chart(ndf).mark_line(
    opacity=0.8,
    strokeWidth=1.2,
    fontSize=70,
    size=2.4
).encode(
    y = alt.Y('tweet_count',axis=alt.Axis(tickCount=8,domain=True)),
    x = alt.X('datetime',axis=alt.Axis(tickCount=8,domain=True,format = ("%I:%M"))),
    color=alt.Color('team',
                   scale=alt.Scale(
            range=[ 'green','#ffdb4d']),title=''),
).properties(width=500, height=300)

ad = alt.Chart(a_df).mark_text(
    opacity=0.9,
    strokeWidth=1.2,
    lineBreak='\n',
    size=14
).encode(
    y=alt.Y('count', stack='zero',axis=alt.Axis(tickCount=5,)),
    x=alt.X('date',axis=alt.Axis(tickCount=5),title=None),
    text=alt.Text('note')
).properties(width=500, height=300)

df=pd.DataFrame([['2017-08-27 00:15:00',2],['2017-08-27 00:15:00',3.7]],columns=['a','b'])
df.a=pd.to_datetime(df.a)
dfr=alt.Chart(df).mark_line(
    opacity=1,
    stroke='black',
    strokeWidth=1.6,
    fontSize=70
).encode(
    y = alt.Y('b',axis=alt.Axis(tickCount=8,domain=True)),
    x = alt.X('a',axis=alt.Axis(tickCount=8,domain=True))
)

df1=pd.DataFrame([['2017-08-27 00:30:00',3.8],['2017-08-27 00:24:00',4.3]],columns=['a','b'])
df1.a=pd.to_datetime(df1.a)
dfr1=alt.Chart(df1).mark_line(
    opacity=1,
    stroke='black',
    strokeWidth=1.6,
    fontWeight='bold'
).encode(
    y = alt.Y('b',axis=alt.Axis(tickCount=8,domain=True),title="Four minute rolling average"),
    x = alt.X('a',axis=alt.Axis(tickCount=8,domain=True))
)

(ad+bars+dfr+dfr1).configure_axis(
    labelFontSize=11,
    titleFontSize=20,
    ).configure_view(
    strokeWidth=0).properties(
    title={"text":"Irish Pride VS The Money Team?",
           
           "subtitle":["4 minute rolling average of the number of uses of selected emoji in","sampled tweets during the Mayweather-McGregor fight"],
           "subtitleColor": "black",
           "subtitleFontSize":18
          }).configure_scale(
    bandPaddingInner=0.2
).configure_legend(orient='top',
                   symbolType='stroke',
                    labelFontSize=25
                   
                  ).configure_axisLeft(titleFontSize=14)

![2.2](assets/2pt2_.png)