In [1]:
import pandas as pd
import networkx as nx
import pickle
import ast

df1 = pd.read_csv('../reading_and_cleaning/guest_host_cleaned_podcasts.csv', sep='\t', index_col=0)
split_hosts = pd.read_csv('../reading_and_cleaning/split_hosts.csv', sep='\t', index_col=0)
guest_durations = pd.read_csv('../reading_and_cleaning/guest_durations.csv', sep='\t', index_col=0)
G2 = nx.from_pandas_dataframe(guest_durations, 'guests', 'hosts', edge_attr=['duration'], create_using=nx.Graph())

podcast_info = pd.read_csv('../reading_and_cleaning/meta_podcast_info.csv', sep='\t', index_col=0)
host_list = []
for index1, row1 in podcast_info.iterrows():
    hosts = ast.literal_eval(row1['Hosts'])
    for host in hosts:
        host_list.append(host)

host_list = set(host_list)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

top_cat = load_obj('top_categories')
top_cat_num = pd.DataFrame.from_dict(top_cat, orient='index')
top_cat_num = top_cat_num[0].astype('category').cat.codes.to_dict()

G2 = nx.from_pandas_dataframe(guest_durations, 'guests', 'hosts', edge_attr=['duration'], create_using=nx.Graph())
nx.set_node_attributes(G2, 'cat', top_cat) 
print(nx.number_of_nodes(G2))

11232


In [2]:
remove = [node for node,degree in G2.degree().items() if degree < 3]
G2.remove_nodes_from(remove)
nx.number_of_nodes(G2)

1699

In [3]:
import plotly.plotly as py
from plotly.graph_objs import *


pos = nx.spring_layout(G2)

dmin=1
ncenter=0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d

p=nx.single_source_shortest_path_length(G2,ncenter)

nx.set_node_attributes(G2, 'pos', pos)

In [4]:
edge_trace = Scatter(
    x=[],
    y=[],
    line=Line(width=0.5,color='#888'),
    #line=Line(width=0.5,colorscale='Electric',color=[]),
    hoverinfo='none',
    mode='lines',
    showlegend=False)


node_trace = Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    name=[],
    showlegend=True,
    marker=Marker(
        showscale=True,
        # colorscale options
        # 'G2reys' | 'G2reens' | 'Bluered' | 'Hot' | 'Picnic' | 'Portland' |
        # Jet' | 'RdBu' | 'Blackbody' | 'Earth' | 'Electric' | 'YIOrRd' | 'YIG2nBu'
        colorscale='Rainbow',    #'YIG2nBu',
        reversescale=True,
        color=[],
        #name=[],
        size=[],
        colorbar=dict(
            thickness=15,
            title='Categories',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=1)))


In [5]:
pr = load_obj('pr_dict')

for edge in G2.edges():
    x0, y0 = G2.node[edge[0]]['pos']
    x1, y1 = G2.node[edge[1]]['pos']
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]
    #edge_trace['line']['width'].append(0.3*(np.log(G2[edge[0]][edge[1]]['duration']+1)-6))
    #edge_trace['line']['color'].append(G2[edge[0]][edge[1]]['duration'])
    #print(0.3*(np.log(G2[edge[0]][edge[1]]['duration'])-6))
    
for node in G2.nodes():
    x, y = G2.node[node]['pos']
    node_trace['x'].append(x)
    node_trace['y'].append(y)

for node, adjacencies in enumerate(G2.adjacency_list()):
    #print(G2.nodes()[node])
    #node_trace['marker']['color'].append(len(adjacencies))
    node_trace['marker']['color'].append(top_cat_num[G2.nodes()[node]])
    node_trace['name'].append(top_cat[G2.nodes()[node]])
    node_trace['marker']['size'].append(10+300*(pr[G2.nodes()[node]]))
    node_info = G2.nodes()[node]   #'# of connections: '+str(len(adjacencies))
    node_trace['text'].append(node_info)


In [7]:
fig = Figure(data=Data([edge_trace, node_trace]),
             layout=Layout(
                title='Podcast Network Graph',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="TV & Film<br><br><br><br><br><br>Sports & Recreation<br><br><br><br><br><br>Society & Culture<br><br><br><br><br><br>Science & Medicine<br><br><br><br><br><br><br>News & Politics<br><br><br><br><br><br>Music<br><br><br><br>Health<br><br><br><br>Education<br><br><br><br><br>Comedy<br><br><br><br><br>Business<br><br><br><br><br>Arts",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=1.03, y=0.96) ],
                xaxis=XAxis(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=YAxis(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='test_network')

In [9]:
top_cat['Jen Kirkman']

'Health'