In [2]:
import pandas as pd
import networkx as nx
import pickle
import ast

df1 = pd.read_csv('../reading_and_cleaning/guest_host_cleaned_podcasts.csv', sep='\t', index_col=0)
split_hosts = pd.read_csv('../reading_and_cleaning/split_hosts.csv', sep='\t', index_col=0)
guest_durations = pd.read_csv('../reading_and_cleaning/guest_durations.csv', sep='\t', index_col=0)
G2 = nx.from_pandas_dataframe(guest_durations, 'guests', 'hosts', edge_attr=['duration'], create_using=nx.Graph())

podcast_info = pd.read_csv('../reading_and_cleaning/meta_podcast_info.csv', sep='\t', index_col=0)
host_list = []
for index1, row1 in podcast_info.iterrows():
    hosts = ast.literal_eval(row1['Hosts'])
    for host in hosts:
        host_list.append(host)

host_list = set(host_list)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

top_cat = load_obj('top_categories')
top_cat_num = pd.DataFrame.from_dict(top_cat, orient='index')
top_cat_num = top_cat_num[0].astype('category').cat.codes.to_dict()

G2 = nx.from_pandas_dataframe(guest_durations, 'guests', 'hosts', edge_attr=['duration'], create_using=nx.Graph())
nx.set_node_attributes(G2, 'cat', top_cat) 
print(nx.number_of_nodes(G2))

12813


In [3]:
remove = [node for node,degree in G2.degree().items() if degree < 3]
G2.remove_nodes_from(remove)
nx.number_of_nodes(G2)

1869

In [4]:
import plotly.plotly as py
from plotly.graph_objs import *


pos = nx.spring_layout(G2)

dmin=1
ncenter=0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d

p=nx.single_source_shortest_path_length(G2,ncenter)

nx.set_node_attributes(G2, 'pos', pos)

In [5]:
edge_trace = Scatter(
    x=[],
    y=[],
    line=Line(width=0.5,color='#888'),
    #line=Line(width=0.5,colorscale='Electric',color=[]),
    hoverinfo='none',
    mode='lines',
    showlegend=False)


node_trace = Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    name=[],
    showlegend=True,
    marker=Marker(
        showscale=True,
        # colorscale options
        # 'G2reys' | 'G2reens' | 'Bluered' | 'Hot' | 'Picnic' | 'Portland' |
        # Jet' | 'RdBu' | 'Blackbody' | 'Earth' | 'Electric' | 'YIOrRd' | 'YIG2nBu'
        colorscale='Rainbow',    #'YIG2nBu',
        reversescale=True,
        color=[],
        #name=[],
        size=[],
        colorbar=dict(
            thickness=15,
            title='Categories',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=1)))


In [6]:
pr = load_obj('pr_dict')

for edge in G2.edges():
    x0, y0 = G2.node[edge[0]]['pos']
    x1, y1 = G2.node[edge[1]]['pos']
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]
    #edge_trace['line']['width'].append(0.3*(np.log(G2[edge[0]][edge[1]]['duration']+1)-6))
    #edge_trace['line']['color'].append(G2[edge[0]][edge[1]]['duration'])
    #print(0.3*(np.log(G2[edge[0]][edge[1]]['duration'])-6))
    
for node in G2.nodes():
    x, y = G2.node[node]['pos']
    node_trace['x'].append(x)
    node_trace['y'].append(y)

for node, adjacencies in enumerate(G2.adjacency_list()):
    #print(G2.nodes()[node])
    #node_trace['marker']['color'].append(len(adjacencies))
    node_trace['marker']['color'].append(top_cat_num[G2.nodes()[node]])
    node_trace['name'].append(top_cat[G2.nodes()[node]])
    node_trace['marker']['size'].append(10+300*(pr[G2.nodes()[node]]))
    node_info = G2.nodes()[node]   #'# of connections: '+str(len(adjacencies))
    node_trace['text'].append(node_info)


KeyError: ' Trae Crowder'

In [8]:
pr[' Trae Crowder']

KeyError: ' Trae Crowder'

In [None]:
fig = Figure(data=Data([edge_trace, node_trace]),
             layout=Layout(
                title='Podcast Network Graph',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="TV & Film<br><br><br><br><br><br>Sports & Recreation<br><br><br><br><br><br>Society & Culture<br><br><br><br><br><br>Science & Medicine<br><br><br><br><br><br><br>News & Politics<br><br><br><br><br><br>Music<br><br><br><br>Health<br><br><br><br>Education<br><br><br><br><br>Comedy<br><br><br><br><br>Business<br><br><br><br><br>Arts",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=1.03, y=0.96) ],
                xaxis=XAxis(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=YAxis(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='test_network')

In [None]:
top_cat['Jen Kirkman']

In [9]:
podcast_info = pd.read_csv('../reading_and_cleaning/meta_podcast_info.csv', sep='\t', index_col=0)
similarities = pd.read_csv('podcast_similarities.csv', sep='\t', index_col=0)
similarities = similarities[similarities['score']>0]
similarities

Unnamed: 0,podcast1,podcast2,score
1,The Joe Rogan Experience,The Duncan Trussell Family Hour,72523873291
2,The Joe Rogan Experience,Bertcast's podcast,115915968403
3,The Joe Rogan Experience,The Fighter & The Kid,23422356713
4,The Joe Rogan Experience,Ari Shaffir's Skeptic Tank,73295028049
5,The Joe Rogan Experience,Under The Skin with Russell Brand,1153617697
6,The Joe Rogan Experience,Pointless: with Kevin Pereira,35157945337
7,The Joe Rogan Experience,ID10T with Chris Hardwick,13409421894
8,The Joe Rogan Experience,Waking Up with Sam Harris,2746939403
9,The Joe Rogan Experience,Kill Tony,37016236338
10,The Joe Rogan Experience,The Rubin Report,8171214049


In [10]:
G1 = nx.from_pandas_dataframe(similarities, 'podcast1', 'podcast2', edge_attr=['score'], create_using=nx.Graph())

In [11]:
import plotly.plotly as py
from plotly.graph_objs import *
import numpy as np


pos = nx.spring_layout(G1, k=100, iterations=500, weight="score")
# pos = nx.spectral_layout(G1, weight="score")

dmin=1
ncenter=0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d

p=nx.single_source_shortest_path_length(G1,ncenter)

nx.set_node_attributes(G1, 'pos', pos)
pos

{'10% Happier with Dan Harris': array([ 0.46681412,  0.69176283]),
 'Alison Rosen Is Your New Best Friend': array([ 0.50386423,  0.38064679]),
 'All Things Comedy Live': array([ 0.61119242,  0.35687738]),
 'Allegedly with Theo Von & Matthew Cole Weiss': array([ 0.35869821,  0.51179096]),
 'Anna Faris Is Unqualified': array([ 0.37415619,  0.42557752]),
 "Ari Shaffir's Skeptic Tank": array([ 0.55762972,  0.40720103]),
 'Armchair Expert with Dax Shepard': array([ 0.08298803,  0.40898952]),
 'Ask Me Another': array([ 0.19668121,  0.37764152]),
 'Aubrey Marcus Podcast': array([ 0.67632964,  0.54253787]),
 "Bertcast's podcast": array([ 0.54070318,  0.41825319]),
 'Bitch Sesh: A Real Housewives Breakdown': array([ 0.28428801,  0.26211579]),
 'Brody Stevens Festival Of Sports': array([ 0.75323462,  0.26995967]),
 'Bulletproof Radio': array([ 0.76878225,  0.80506205]),
 'Bullseye with Jesse Thorn': array([ 0.36110569,  0.40101043]),
 'Canceled': array([ 0.23337663,  0.0746047 ]),
 'Chris Grosso

In [12]:
edge_trace = Scatter(
    x=[],
    y=[],
    line=Line(width=0.5,color='#888'),
    #line=Line(width=0.5,colorscale='Electric',color=[]),
    hoverinfo='none',
    mode='lines',
    showlegend=False)


node_trace = Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
#     name=[],
    showlegend=True,
    marker=Marker(
        showscale=True,
        # colorscale options
        # 'G2reys' | 'G2reens' | 'Bluered' | 'Hot' | 'Picnic' | 'Portland' |
        # Jet' | 'RdBu' | 'Blackbody' | 'Earth' | 'Electric' | 'YIOrRd' | 'YIG2nBu'
        colorscale='Hot',    #'YIG2nBu',
        reversescale=False,
        color=[],
        #name=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

In [13]:
for edge in G1.edges():
    x0, y0 = G1.node[edge[0]]['pos']
    x1, y1 = G1.node[edge[1]]['pos']
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]
    #edge_trace['line']['width'].append(0.3*(np.log(G2[edge[0]][edge[1]]['duration']+1)-6))
    #edge_trace['line']['color'].append(G2[edge[0]][edge[1]]['duration'])
    #print(0.3*(np.log(G2[edge[0]][edge[1]]['duration'])-6))
    
for node in G1.nodes():
    x, y = G1.node[node]['pos']
    node_trace['x'].append(x)
    node_trace['y'].append(y)

for node, adjacencies in enumerate(G1.adjacency_list()):
    #print(G1.nodes()[node])
    node_trace['marker']['color'].append(len(adjacencies))
#     node_trace['marker']['color'].append(top_cat_num[G2.nodes()[node]])
#     node_trace['name'].append(top_cat[G2.nodes()[node]])
#     node_trace['marker']['size'].append(10+300*(pr[G2.nodes()[node]]))
    node_info = G1.nodes()[node]   #'# of connections: '+str(len(adjacencies))
    node_trace['text'].append(node_info)

In [14]:
fig = Figure(data=Data([edge_trace, node_trace]),
             layout=Layout(
                title='Podcast Network Graph',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=1.03, y=0.96) ],
                xaxis=XAxis(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=YAxis(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='podcast_network')

In [None]:
import pandas as pd
import networkx as nx
import numpy as np
import ast

guest_durations = pd.read_csv('../reading_and_cleaning/guest_durations.csv', sep='\t', index_col=0)
G = nx.from_pandas_dataframe(guest_durations, 'guests', 'hosts', edge_attr=['duration'], create_using=nx.Graph())


podcast_info = pd.read_csv('../reading_and_cleaning/meta_podcast_info.csv', sep='\t', index_col=0)
host_list = []
for index1, row1 in podcast_info.iterrows():
    hosts = ast.literal_eval(row1['Hosts'])
    for host in hosts:
        host_list.append(host)

host_list = set(host_list)
print("length of host list", len(host_list))

people = G.nodes()
num_people = len(people)

guest_list = [p for p in people if p not in host_list]
print("length of guest list", len(guest_list))



# G_guests.remove_nodes_from(host_list)
# G_hosts.remove_nodes_from(guest_list)

# print(len(G_hosts), len(G_guests))
# G_guests.degree()

degrees = G.degree()
degrees_guests = []
degrees_hosts = []
degrees_all = []

for key, value in degrees.items():
    degrees_all.append(value)
    if(key in guest_list):
        degrees_guests.append(value)
    if(key in host_list):
        degrees_hosts.append(value)


degree_values_guests = sorted(set(degrees_guests))
degree_values_hosts = sorted(set(degrees_hosts))

degree_values = sorted(set(degrees_all))
max_degree = max(degree_values)

degree_list = np.linspace(max_degree, 1, max_degree)
# print(degrees_all)
cdf = []
for i in degree_list:
    f = [j for j in degrees_all if j <= i]
    frac = len(f)/num_people
    cdf.append(frac)
    
print(cdf)
    
# histogram_guests = [list(degrees_guests.values()).count(i)/float(nx.number_of_nodes(G_guests)) for i in degree_guests_values]

freq_guests = [list(degrees_guests).count(i) for i in degree_values_guests]
freq_hosts = [list(degrees_hosts).count(i) for i in degree_values_hosts]

In [None]:
m_list = np.linspace(1,20,20)
a_list = np.linspace(0,1,51)
# print(m_list, a_list)

# print(len(cdf), num_people, len(degree_list))

cdf_len = len(cdf)

min_error = 1e100
for m in m_list:
    for a in a_list:
        F_model = [(1-((m+m*a*2/(1-a))/(d+a*m*2/(1-a)))**(2/(1-a))) for d in degree_list]
        model_error = 0
        for d in range(cdf_len):
            model_error += ((cdf[d]-F_model[d])/cdf[d])**2.0
#         print(model_error)
        if(model_error<min_error):
            min_error=model_error
            print(m, a, min_error)
            best_a = a
            best_m = m
            
print(best_m, best_a)
best_a = 0.99
# best_m = 10
F_mod = [(1-((best_m+best_m*best_a*2/(1-best_a))/(d+best_a*best_m*2/(1-best_a)))**(2/(1-best_a))) for d in degree_list]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

params = {'legend.fontsize': 'xx-large',
          'figure.figsize': (10, 5),
         'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'xx-large',
         'ytick.labelsize':'xx-large'}
pylab.rcParams.update(params)



ccdf = [(1-f) for f in cdf]
mod_ccdf  = [(1-f) for f in F_mod]
# print(ccdf)
plt.scatter(degree_list, ccdf, label='Empirical Distribution')
plt.scatter(degree_list, mod_ccdf, color='Firebrick', label='Model Fit')

plt.xlabel('Degree')
plt.ylabel('1-F(d)')

plt.xscale('log')
plt.yscale('log')

plt.xlim([0.9,1200])
plt.ylim([1e-5,1])

plt.legend()

plt.gcf()
plt.draw()
plt.savefig('plots/cumulative_degree_distribution.pdf',format='pdf')
plt.savefig('plots/cumulative_degree_distribution.png',format='png')
plt.show()

In [None]:


params = {'legend.fontsize': 'xx-large',
          'figure.figsize': (10, 5),
         'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'xx-large',
         'ytick.labelsize':'xx-large'}
pylab.rcParams.update(params)

# plt.scatter(degree_values_guests, freq_guests, label='Guests only')
# plt.scatter(degree_values_hosts, freq_hosts, color='Firebrick', label='Hosts')

bins=np.logspace(0,3,50)
plt.hist(degrees_guests, log=False, bins=bins, histtype='step', label='Guests only')
plt.hist(degrees_hosts, log=False, bins=bins, histtype='step', color='Firebrick', label='Hosts')

plt.xlabel('Degree')
plt.ylabel('Frequency')

plt.xscale('log')
plt.yscale('log')

plt.legend()

plt.gcf()
plt.draw()
plt.savefig('plots/degree_distribution.pdf',format='pdf')
plt.savefig('plots/degree_distribution.png',format='png')
plt.show()

In [None]:
df1 = pd.read_csv('../reading_and_cleaning/guest_host_cleaned_podcasts.csv', sep='\t', index_col=0)
podcasts_per_guest = df1.groupby(['guests'])['podcast'].count()
visits = sorted(set(podcasts_per_guest.values))
histogram = [list(podcasts_per_guest.values).count(i) for i in visits]

In [None]:
f = plt.figure()
plt.bar(visits, histogram)
plt.xlabel('Visits per podcast per guest')
plt.ylabel('Number of guests')
print(histogram)
# plt.xscale('log')
plt.yscale('log')

plt.xlim([0,50])

plt.gcf()
plt.draw()
plt.savefig('visits_per_podcast_per_guest_hist.png',format='png')
plt.show()

In [None]:
split_hosts = pd.read_csv('../reading_and_cleaning/split_hosts.csv', sep='\t', index_col=0)

split_hosts['date'] = pd.to_datetime(split_hosts['date'])
split_hosts.sort_values(by='date', inplace=True)
split_hosts

In [None]:
from datetime import datetime as dt
host_first = split_hosts[split_hosts['hosts']=='Jesse Thorn']['date'].iloc[0]
guest_first = split_hosts[split_hosts['guests']=='Jesse Thorn']['date'].iloc[0]

print(host_first, guest_first, host_first<guest_first)

In [23]:
import plotly.plotly as py
from plotly.graph_objs import *

trace1 = {
  "x": ['Comedy', 'Society & Culture', 'News & Politics', 'Health',
       'Science & Medicine', 'Education', 'Sports & Recreation',
       'Business', 'TV & Film', 'Arts', 'Religion & Spirituality', 'Music',
       'Other Games', 'Technology'], 
  "y": ['Comedy', 'Society & Culture', 'News & Politics', 'Health',
       'Science & Medicine', 'Education', 'Sports & Recreation',
       'Business', 'TV & Film', 'Arts', 'Religion & Spirituality', 'Music',
       'Other Games', 'Technology'], 
  "z": [[  2.25400000e+04,   5.97000000e+02,   2.36000000e+02,
          2.09000000e+02,   4.70000000e+01,   2.20000000e+01,
          3.10000000e+02,   7.00000000e+01,   4.37000000e+02,
          1.03000000e+02,   2.50000000e+01,   5.50000000e+01,
          4.70000000e+01,   3.00000000e+00],
       [  5.97000000e+02,   2.57200000e+03,   5.60000000e+01,
          2.40000000e+01,   4.81000000e+02,   2.60000000e+01,
          3.10000000e+01,   2.20000000e+01,   5.20000000e+01,
          3.00000000e+01,   1.90000000e+01,   2.00000000e+01,
          7.00000000e+00,   0.00000000e+00],
       [  2.36000000e+02,   5.60000000e+01,   2.04400000e+03,
          2.70000000e+01,   3.10000000e+01,   1.70000000e+01,
          2.70000000e+01,   1.70000000e+01,   1.80000000e+01,
          1.20000000e+01,   5.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   1.00000000e+00],
       [  2.09000000e+02,   2.40000000e+01,   2.70000000e+01,
          2.03400000e+03,   2.20000000e+01,   9.00000000e+00,
          9.30000000e+01,   7.10000000e+01,   5.00000000e+00,
          2.90000000e+01,   1.40000000e+01,   2.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  4.70000000e+01,   4.81000000e+02,   3.10000000e+01,
          2.20000000e+01,   8.96000000e+02,   2.00000000e+01,
          3.00000000e+00,   8.00000000e+00,   4.00000000e+00,
          1.10000000e+01,   1.40000000e+01,   1.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  2.20000000e+01,   2.60000000e+01,   1.70000000e+01,
          9.00000000e+00,   2.00000000e+01,   1.51600000e+03,
          1.00000000e+00,   4.10000000e+01,   0.00000000e+00,
          2.00000000e+00,   2.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  3.10000000e+02,   3.10000000e+01,   2.70000000e+01,
          9.30000000e+01,   3.00000000e+00,   1.00000000e+00,
          1.66800000e+03,   8.00000000e+00,   1.20000000e+01,
          2.00000000e+00,   0.00000000e+00,   6.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  7.00000000e+01,   2.20000000e+01,   1.70000000e+01,
          7.10000000e+01,   8.00000000e+00,   4.10000000e+01,
          8.00000000e+00,   1.09800000e+03,   5.00000000e+00,
          1.20000000e+01,   4.00000000e+00,   5.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  4.37000000e+02,   5.20000000e+01,   1.80000000e+01,
          5.00000000e+00,   4.00000000e+00,   0.00000000e+00,
          1.20000000e+01,   5.00000000e+00,   1.18200000e+03,
          2.50000000e+01,   1.00000000e+00,   2.00000000e+00,
          3.00000000e+00,   0.00000000e+00],
       [  1.03000000e+02,   3.00000000e+01,   1.20000000e+01,
          2.90000000e+01,   1.10000000e+01,   2.00000000e+00,
          2.00000000e+00,   1.20000000e+01,   2.50000000e+01,
          6.50000000e+02,   6.00000000e+00,   2.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  2.50000000e+01,   1.90000000e+01,   5.00000000e+00,
          1.40000000e+01,   1.40000000e+01,   2.00000000e+00,
          0.00000000e+00,   4.00000000e+00,   1.00000000e+00,
          6.00000000e+00,   5.46000000e+02,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  5.50000000e+01,   2.00000000e+01,   0.00000000e+00,
          2.00000000e+00,   1.00000000e+00,   0.00000000e+00,
          6.00000000e+00,   5.00000000e+00,   2.00000000e+00,
          2.00000000e+00,   0.00000000e+00,   4.44000000e+02,
          6.00000000e+00,   0.00000000e+00],
       [  4.70000000e+01,   7.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   3.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   6.00000000e+00,
          2.08000000e+02,   0.00000000e+00],
       [  3.00000000e+00,   0.00000000e+00,   1.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   5.80000000e+01]],
#   "colorbar": {"title": ""}, 
  'colorscale': [
        [0, 'rgb(240,249,232)'],        #0
        [1./1000, 'rgb(186,228,188)'], #10
        [1./100, 'rgb(123,204,196)'],  #100
        [1./10, 'rgb(67,162,202)'],   #1000
        [1., 'rgb(8,104,172)'],       #10000
#         [1., 'rgb(8,104,172)'],             #100000
  ],
  "showscale": False,
  "dx": 1, 
  "dy": 1, 
  "name": "trace 0", 
  "showlegend": False, 
  "type": "heatmap", 
  "uid": "248f71", 
  "x0": 0, 
  "xsrc": "Dreamshot:4293:698862", 
  "y0": 0, 
  "ysrc": "Dreamshot:4293:83a556", 
  "zauto": False, 
  "zmax": 22540, 
  "zmin": 0, 
  "zsrc": "Dreamshot:4293:-698862,83a556"
}
# data = Data([trace1])
layout1 = {
  "autosize": False, 
  "bargap": 0.2, 
  "bargroupgap": 0, 
  "barmode": "stack", 
  "boxgap": 0.3, 
  "boxgroupgap": 0.3, 
  "boxmode": "overlay", 
  "dragmode": "zoom", 
  "font": {
    "color": "rgb(33, 33, 33)", 
    "family": "Raleway, sans-serif", 
    "size": 12
  }, 
  "height": 800, 
  "hidesources": False, 
  "hovermode": "x", 
  "legend": {
    "bgcolor": "#fff", 
    "bordercolor": "#000", 
    "borderwidth": 1, 
    "font": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "traceorder": "normal"
  }, 
  "margin": {
    "r": 50, 
    "t": 100, 
    "autoexpand": True, 
    "b": 80, 
    "l": 150, 
    "pad": 0
  }, 
  "paper_bgcolor": "#fff", 
  "plot_bgcolor": "#fff", 
  "separators": ".,", 
  "showlegend": False, 
  "title": "Category Mixing", 
  "titlefont": {
    "color": "", 
    "family": "", 
    "size": 0
  }, 
  "width": 850, 
  "xaxis": {
    "anchor": "y", 
    "autorange": True, 
    "autotick": True, 
    "domain": [0, 1], 
    "dtick": 1, 
    "exponentformat": "SI", 
    "gridcolor": "#ddd", 
    "gridwidth": 1, 
    "linecolor": "#000", 
    "linewidth": 1, 
    "mirror": False, 
    "nticks": 0, 
    "overlaying": False, 
    "position": 0, 
    "range": [-0.5, 11.5], 
    "rangemode": "normal", 
    "showexponent": "all", 
    "showgrid": False, 
    "showline": False, 
    "showticklabels": True, 
    "side": "bottom", 
    "tick0": 0, 
    "tickangle": "auto", 
    "tickcolor": "#000", 
    "tickfont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "ticklen": 5, 
    "ticks": "", 
    "tickwidth": 1, 
    "title": "", 
    "titlefont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "type": "category", 
    "zeroline": False, 
    "zerolinecolor": "#000", 
    "zerolinewidth": 1
  }, 
  "yaxis": {
    "anchor": "x", 
    "autorange": True, 
    "autotick": True, 
    "domain": [0, 1], 
    "dtick": 1, 
    "exponentformat": "SI", 
    "gridcolor": "#ddd", 
    "gridwidth": 1, 
    "linecolor": "#000", 
    "linewidth": 1, 
    "mirror": False, 
    "nticks": 0, 
    "overlaying": False, 
    "position": 0, 
    "range": [-0.5, 30.5], 
    "rangemode": "normal", 
    "showexponent": "all", 
    "showgrid": False, 
    "showline": False, 
    "showticklabels": True, 
    "side": "left", 
    "tick0": 0, 
    "tickangle": "auto", 
    "tickcolor": "#000", 
    "tickfont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "ticklen": 5, 
    "ticks": "", 
    "tickwidth": 1, 
    "title": "", 
    "titlefont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "type": "category", 
    "zeroline": False, 
    "zerolinecolor": "#000", 
    "zerolinewidth": 1
  }
}

In [24]:
fig = Figure(data=Data([trace1]),
             layout=layout1)
py.iplot(fig, filename='category_matrix')

In [27]:
import plotly.plotly as py
import plotly.graph_objs as go

data = [go.Bar(
            x=['Comedy', 'Society & Culture', 'News & Politics', 'Health',
       'Science & Medicine', 'Education', 'Business', 'TV & Film',
       'Sports & Recreation', 'Arts', 'Religion & Spirituality', 'Music',
       'Other Games', 'Technology'],
            y=[5314, 1305,  985,  958,  798,  750,  528,  501,  490,  333,  274,
        114,   57,   30]
    )]

layout = go.Layout(
    yaxis=dict(
        type='log',
        autorange=True
    ),
    title="People's Top Categories",
    titlefont=dict(size=16),
    width=850,
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='category_bar_chart')