In [1]:
from update_json import update_json
import pandas as pd
import numpy as np

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

In [2]:
'''
run one of the two lines below to either read the data file, or update then read the data file

read_json is faster, but it doesn't update the data file

remove the return_df param from update_json to update the file without reading it
'''
df = pd.read_json('data/data.json').T

# df = update_json('data/data.json', return_df=True).T

In [3]:
df['avgCommentLenth'] = df['avgCommentLenth'].astype(float)
df['commentCount'] = df['commentCount'].astype(float)

In [4]:
df = df.reset_index()

In [5]:
df.columns = ['Celebrity', 'avgCommentLength', 'avgKarma', 'commentCount', 'Comments', 'Username', 'avgVader']

In [6]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 7 columns):
Celebrity           51 non-null object
avgCommentLength    51 non-null float64
avgKarma            51 non-null object
commentCount        51 non-null float64
Comments            51 non-null object
Username            51 non-null object
avgVader            51 non-null object
dtypes: float64(2), object(5)
memory usage: 2.9+ KB


Unnamed: 0,Celebrity,avgCommentLength,avgKarma,commentCount,Comments,Username,avgVader
0,Macaulay Culkin,54.433962,"{'compound': 3581.977358490566, 'ups': 3581.97...",265.0,"[{'text': 'My penis. ', 'length': 10, 'utc_tim...",MacaulayCulkinAMA,"{'compound': 0.14748830188679202, 'neg': 0.053..."
1,Patrick Stewart,182.783784,"{'compound': 2770.5945945945946, 'ups': 2770.5...",37.0,"[{'text': 'Frakes with a beard, preferable, be...",sirpatstew,"{'compound': 0.48746756756756704, 'neg': 0.029..."
2,Arnold Schwarzenegger,249.148352,"{'compound': 1621.5054945054944, 'ups': 1621.5...",364.0,[{'text': 'What an incredible story. Thank you...,GovSchwarzenegger,"{'compound': 0.419281868131868, 'neg': 0.03788..."
3,Bill Gates,308.345528,"{'compound': 3915.8943089430895, 'ups': 3915.8...",246.0,[{'text': 'I would be glad to pass along your ...,thisisbillgates,"{'compound': 0.48653821138211306, 'neg': 0.050..."
4,Snoop Dogg,30.46,"{'compound': 892.929, 'ups': 892.929, 'douns':...",1000.0,[{'text': 'KBone !! When you coming thru tha d...,Here_Comes_The_King,"{'compound': 0.09898459999999999, 'neg': 0.040..."


In [8]:
karma = pd.DataFrame(list(df['avgKarma']))
karma['Celebrity'] = df['Celebrity']
karma.columns = ['karma_compound','downvotes', 'upvotes', 'Celebrity']
karma.head()

Unnamed: 0,karma_compound,downvotes,upvotes,Celebrity
0,3581.977358,0.0,3581.977358,Macaulay Culkin
1,2770.594595,0.0,2770.594595,Patrick Stewart
2,1621.505495,0.0,1621.505495,Arnold Schwarzenegger
3,3915.894309,0.0,3915.894309,Bill Gates
4,892.929,0.0,892.929,Snoop Dogg


In [9]:
vader = pd.DataFrame(list(df['avgVader']))
vader['Celebrity'] = df['Celebrity']
vader.columns = ['vader_compound','vader_neg', 'vader_neu', 'vader_pos', 'Celebrity']
vader.head()

Unnamed: 0,vader_compound,vader_neg,vader_neu,vader_pos,Celebrity
0,0.147488,0.05397,0.796411,0.149623,Macaulay Culkin
1,0.487468,0.029541,0.79627,0.17427,Patrick Stewart
2,0.419282,0.037882,0.772401,0.18972,Arnold Schwarzenegger
3,0.486538,0.050882,0.777768,0.171378,Bill Gates
4,0.098985,0.040439,0.816328,0.141233,Snoop Dogg


In [10]:
counts = df[['Celebrity','commentCount', 'avgCommentLength']]

In [11]:
counts = counts.merge(vader, left_on='Celebrity', right_on='Celebrity', how='left')
counts = counts.merge(karma, left_on='Celebrity', right_on='Celebrity', how='left')
counts.head()

Unnamed: 0,Celebrity,commentCount,avgCommentLength,vader_compound,vader_neg,vader_neu,vader_pos,karma_compound,downvotes,upvotes
0,Macaulay Culkin,265.0,54.433962,0.147488,0.05397,0.796411,0.149623,3581.977358,0.0,3581.977358
1,Patrick Stewart,37.0,182.783784,0.487468,0.029541,0.79627,0.17427,2770.594595,0.0,2770.594595
2,Arnold Schwarzenegger,364.0,249.148352,0.419282,0.037882,0.772401,0.18972,1621.505495,0.0,1621.505495
3,Bill Gates,246.0,308.345528,0.486538,0.050882,0.777768,0.171378,3915.894309,0.0,3915.894309
4,Snoop Dogg,1000.0,30.46,0.098985,0.040439,0.816328,0.141233,892.929,0.0,892.929


In [12]:
data = [
    go.Bar(
        x=counts['Celebrity'], 
        y=counts['commentCount'], 
        name='Total Comment Count',
        textposition = 'auto'),
    go.Bar(
        x=counts['Celebrity'], 
        y=counts['avgCommentLength'], 
        name='Avg Comment Length',
        textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['karma_compound'], 
#         name='Net Karma',
#         textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'],
#         y=counts['vader_compound'],
#         name='Avg Compound Vader Score',
#         textposition = 'auto')
]

layout= go.Layout(
    title= "User's Comment Count vs Average Comment Lenth",
    hovermode= 'closest',
    xaxis= dict(
        title= 'Comment Count',
        ticklen= 5,
        #zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Avg Comment Length (Characters)',
        ticklen= 5,
        #zeroline=False,
        gridwidth= 2
    ),
    showlegend= False
)


fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [13]:
data = [
    go.Scatter(x=df['commentCount'],
               y=df['avgCommentLength'],
               mode= 'markers',
               text= df['Celebrity'],
              )
]

layout= go.Layout(
    title= "User's Comment Count vs Average Comment Lenth",
    hovermode= 'closest',
    xaxis= dict(
        title= 'Comment Count',
        ticklen= 5,
        #zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Avg Comment Length (Characters)',
        ticklen= 5,
        #zeroline=False,
        gridwidth= 2
    ),
    showlegend= False
)

# iplot(data, filename='basic-bar')

fig=go.Figure(data=data, layout=layout)
iplot(fig)

In [14]:
data = [
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['commentCount'], 
#         name='Total Comment Count',
#         textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['avgCommentLength'], 
#         name='Avg Comment Length',
#         textposition = 'auto'),
    go.Bar(
        x=counts['Celebrity'], 
        y=counts['karma_compound'], 
        name='Net Karma',
        textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'],
#         y=counts['vader_compound'],
#         name='Avg Compound Vader Score',
#         textposition = 'auto')
]

layout= go.Layout(
    title= "Net Karma by User",
    hovermode= 'closest',
    xaxis= dict(
        title= 'Celebrity',
        ticklen= 5,
        #zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Net Karma',
        ticklen= 5,
        #zeroline=False,
        gridwidth= 2
    ),
    showlegend= False
)


fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [15]:
data = [
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['commentCount'], 
#         name='Total Comment Count',
#         textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['avgCommentLength'], 
#         name='Avg Comment Length',
#         textposition = 'auto'),
#     go.Bar(
#         x=counts['Celebrity'], 
#         y=counts['karma_compound'], 
#         name='Net Karma',
#         textposition = 'auto'),
    go.Bar(
        x=counts['Celebrity'],
        y=counts['vader_compound'],
        name='Avg Compound Vader Score',
        textposition = 'auto')
]

layout= go.Layout(
    title= "User's Comment Count vs Average Comment Lenth",
    hovermode= 'closest',
    xaxis= dict(
        title= 'Comment Count',
        ticklen= 5,
        #zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Avg Comment Length (Characters)',
        ticklen= 5,
        #zeroline=False,
        gridwidth= 2
    ),
    showlegend= False
)


fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [17]:
data = [
    go.Scatter(x=counts['vader_compound'],
               y=counts['karma_compound'],
               mode= 'markers',
               text= counts['Celebrity'],
              )
]

layout= go.Layout(
    title= "User's Karma vs Vader Score",
    hovermode= 'closest',
    xaxis= dict(
        title= 'Compound Vader Score',
        ticklen= 5,
        #zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Net Karma',
        ticklen= 5,
        #zeroline=False,
        gridwidth= 2
    ),
    showlegend= False
)

# iplot(data, filename='basic-bar')

fig=go.Figure(data=data, layout=layout)
iplot(fig)