In [None]:
import json
import pandas as pd 
import matplotlib.pyplot as plt
import os
import math
import numpy as np



import networkx as nx

from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine, StaticLayoutProvider,EdgesAndLinkedNodes, NodesAndLinkedEdges, LabelSet
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.palettes import Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8,Category20_20
from bokeh.transform import linear_cmap
from bokeh.models import Range1d, Plot
from sklearn.preprocessing import LabelEncoder  

from networkx import community

from bokeh.plotting import figure
from bokeh.resources import CDN
from bokeh.embed import file_html





In [None]:
output_notebook()

In [None]:
def load_data_csv(name,csv=True):
    # Opening JSON file
    f = open(name)
    data = json.load(f)
    dict_df = []
    
    if csv:
        for action in list(data.keys()):
            # get the data of the last saved action
            dict_df += [list(data[action].values())]
    
        df = pd.DataFrame(dict_df,columns=list(data[list(data.keys())[-1]].keys()))
        return df

    return data

In [None]:
df = load_data_csv('./data/navigation_data_m4.txt')
# df.iloc[0:16].to_csv('Anais_Methode2.csv',sep=';')
# df.iloc[17:].to_csv('Anais_Methode3.csv',sep=';')
# df.iloc[33:].to_csv('Anais_Methode4.csv',sep=';')

In [None]:
def save_bokeh_graph(p,name,path=''):
    html = file_html(p, CDN, "my plot")

    Func = open(f"{path}{name}.html","w") 
    
    # Adding input data to the HTML file 
    Func.write(html) 
                
    # Saving the data into the HTML file 
    Func.close()

### Methode 1

In [None]:

name_list = {'Anais':{'color':'skyblue'},
             'Ella':{'color':'sandybrown'}
             ,'Camille':{'color':'orchid'}
             ,'Lou':{'color':'darkgreen'}}
frames = []

for name in list(name_list.keys()):
    path = f'./data/{name}_methode1.txt'
    #Load json
    df = load_data_csv(path)
    # convert to network
    # add weight + id channel + id follow + user
    df_video = df[df['page_title'] == 'video']
    df_video['channel_follow'] = list(df_video['channel'][1:]) + [pd.NA]
    
    df_video['weight'] = df_video['channel'].apply(lambda x: df_video[df_video['channel'] == x].shape[0])
    df_network_perso = df_video[['channel','channel_follow','weight']]
    df_network_perso['user'] = name

    frames += [df_network_perso]
df_network = pd.concat(frames,ignore_index=True)




le = LabelEncoder()
le.fit(df_network['channel'])
df_network = df_network.dropna()
df_network['index'] = list(df_network.index)
df_network['channel_id'] = le.transform(df_network['channel'])
df_network['channel_follow_id'] = le.transform(df_network['channel_follow'])
df_network['weight'] = df_network['channel'].apply(lambda x: df_network[df_network['channel'] == x].shape[0])



G = nx.from_pandas_edgelist(df_network,"channel_id","channel_follow_id",['weight','user'],create_using=nx.MultiDiGraph)
factor = 1/(max(df_network['weight']))

attrs = {}
for id,row in df_network.iterrows():
    attrs[row['channel_id']] = {"channel": row['channel'], "user":row['user'],"weight":math.log(1 + row['weight']*factor)*40,"weight_o":row['weight']}
# attrs = {0: {"attr1": 20, "attr2": "nothing"}, 1: {"attr2": 3}}
nx.set_node_attributes(G, attrs)

#Communitiesc
# communities = community.greedy_modularity_communities(G)
# # Create empty dictionaries
# modularity_class = {}
# modularity_color = {}
# #Loop through each community in the network
# for community_number, comm in enumerate(communities):
#     #For each member of the community, add their community number and a distinct color
#     for name in comm: 
#         modularity_class[name] = community_number
#         modularity_color[name] = Spectral8[community_number]
# nx.set_node_attributes(G, modularity_class, 'modularity_class')
# nx.set_node_attributes(G, modularity_color, 'color')



edge_attrs = {}
for start_node, end_node, key, ch in G.edges(data=True,keys=True):
    edge_color = name_list[ch['user']]['color'] 
    edge_attrs[(start_node, end_node,key)] = edge_color
nx.set_edge_attributes(G, edge_attrs, "edge_color")

edges = G.edges(keys=True)
colors = [G[u][v][k]['edge_color'] for u,v,k in edges]
weights = [G[u][v][k]['weight'] for u,v,k in edges]

pos = nx.spring_layout(G,k=1/math.sqrt(len(G))*1.3) # Define the layout for node positioning
# nx.draw(G, pos, with_labels=True, node_size=300, node_color='skyblue',edge_color=colors, font_size=10, font_color='black')
# Display the graph
# plt.show()


graph = from_networkx(G, pos, scale=1.8, center=(0,0))
HOVER_TOOLTIPS = [("channel","@channel"),('views',"@weight_o")]
# x_range=(-2, 2), y_range=(-2, 2),
p = figure(
           x_axis_location=None, y_axis_location=None,
           tools="hover,box_zoom,pan,zoom_out,zoom_in,reset,save", tooltips=HOVER_TOOLTIPS)

p.grid.grid_line_color = None
# Add some new columns to the node renderer data source
# graph.node_renderer.data_source.data['index'] = list(range(len(G)))

# #Set node size and color
graph.node_renderer.glyph = Circle(size="weight", fill_color='cadetblue')
# # #Set edge opacity and width and color
graph.edge_renderer.glyph = MultiLine(line_color="edge_color",line_width=2,
                                               line_alpha=1)

# graph.node_renderer.glyph.update(size=20, fill_color="colors")
p.renderers.append(graph)
show(p)


In [None]:
# save_bokeh_graph(p,'plot_nav_BelAnLouCamEl')

### Methode 2

In [None]:
name_list = {'Anais':{'color':'skyblue'},
             'Ella':{'color':'sandybrown'}
             #,'Camille':{'color':'orchid'}
             ,'Lou':{'color':'darkgreen'}
             }
frames = []

for name in list(name_list.keys()):
    print(name)
    path = f'./data/{name}_methode2.txt'
    if os.path.exists(path):
        #Load json
        df = load_data_csv(path)
    else:
        path = f'./data/{name}_methode2.csv'
        df = pd.read_csv(path,sep=';')
        
    # convert to network
    # add weight + id channel + id follow + user
    df_video = df[df['page_title'] == 'video']
    df_video['channel_follow'] = list(df_video['channel'][1:]) + [pd.NA]
    
    df_video['weight'] = df_video['channel'].apply(lambda x: df_video[df_video['channel'] == x].shape[0])
    df_network_perso = df_video[['channel','channel_follow','weight']]
    df_network_perso['user'] = name

    frames += [df_network_perso]
df_network = pd.concat(frames,ignore_index=True)




le = LabelEncoder()
le.fit(df_network['channel'])
df_network = df_network.dropna()
df_network= df_network.drop(df_network.loc[df_network['channel']==''].index)

df_network['index'] = list(df_network.index)
df_network['channel_id'] = le.transform(df_network['channel'])
df_network['channel_follow_id'] = le.transform(df_network['channel_follow'])
df_network['weight'] = df_network['channel'].apply(lambda x: df_network[df_network['channel'] == x].shape[0])



G = nx.from_pandas_edgelist(df_network,"channel_id","channel_follow_id",['weight','user'],create_using=nx.MultiDiGraph)
factor = 1/(max(df_network['weight']))

attrs = {}
for id,row in df_network.iterrows():
    attrs[row['channel_id']] = {"channel": row['channel'], "user":row['user'],"weight":math.log(1 + row['weight']*factor)*40,"weight_o":row['weight']}
# attrs = {0: {"attr1": 20, "attr2": "nothing"}, 1: {"attr2": 3}}
nx.set_node_attributes(G, attrs)

#Communitiesc
# communities = community.greedy_modularity_communities(G)
# # Create empty dictionaries
# modularity_class = {}
# modularity_color = {}
# #Loop through each community in the network
# for community_number, comm in enumerate(communities):
#     #For each member of the community, add their community number and a distinct color
#     for name in comm: 
#         modularity_class[name] = community_number
#         modularity_color[name] = Spectral8[community_number]
# nx.set_node_attributes(G, modularity_class, 'modularity_class')
# nx.set_node_attributes(G, modularity_color, 'color')



edge_attrs = {}
for start_node, end_node, key, ch in G.edges(data=True,keys=True):
    edge_color = name_list[ch['user']]['color'] 
    edge_attrs[(start_node, end_node,key)] = edge_color
nx.set_edge_attributes(G, edge_attrs, "edge_color")

edges = G.edges(keys=True)
colors = [G[u][v][k]['edge_color'] for u,v,k in edges]
weights = [G[u][v][k]['weight'] for u,v,k in edges]

pos = nx.spring_layout(G,k=1/math.sqrt(len(G))*1.3) # Define the layout for node positioning
# nx.draw(G, pos, with_labels=True, node_size=300, node_color='skyblue',edge_color=colors, font_size=10, font_color='black')
# Display the graph
# plt.show()


graph = from_networkx(G, pos, scale=1.8, center=(0,0))
HOVER_TOOLTIPS = [("channel","@channel"),('views',"@weight_o")]
# x_range=(-2, 2), y_range=(-2, 2),
p = figure(
           x_axis_location=None, y_axis_location=None,
           tools="hover,box_zoom,pan,zoom_out,zoom_in,reset,save", tooltips=HOVER_TOOLTIPS)

p.grid.grid_line_color = None
# Add some new columns to the node renderer data source
# graph.node_renderer.data_source.data['index'] = list(range(len(G)))

# #Set node size and color
graph.node_renderer.glyph = Circle(size="weight", fill_color='cadetblue')
# # #Set edge opacity and width and color
graph.edge_renderer.glyph = MultiLine(line_color="edge_color",line_width=2,
                                               line_alpha=1)

# graph.node_renderer.glyph.update(size=20, fill_color="colors")
p.renderers.append(graph)
show(p)


In [None]:
# df = load_data_csv('./data/Anais_methode2.txt')
# df.to_csv('Anais_methode2_2.csv',sep=';')

### Methode 3

In [None]:

name_list = {'Anais':{'color':'skyblue'},
             'Ella':{'color':'sandybrown'}
             ,'Camille':{'color':'orchid'}
             ,'Lou':{'color':'darkgreen'}
             ,'Bellon':{'color':'lightgreen'}}
frames = []

for name in list(name_list.keys()):
    print(name)
    path = f'./data/{name}_methode3.txt'
    if os.path.exists(path):
        #Load json
        df = load_data_csv(path)
    else:
        path = f'./data/{name}_methode3.csv'
        df = pd.read_csv(path,sep=';')
        
    # convert to network
    # add weight + id channel + id follow + user
    df_video = df[df['page_title'] == 'video']
    df_video['channel_follow'] = list(df_video['channel'][1:]) + [pd.NA]
    
    df_video['weight'] = df_video['channel'].apply(lambda x: df_video[df_video['channel'] == x].shape[0])
    df_network_perso = df_video[['channel','channel_follow','weight']]
    df_network_perso['user'] = name

    frames += [df_network_perso]
df_network = pd.concat(frames,ignore_index=True)




le = LabelEncoder()
le.fit(df_network['channel'])
df_network = df_network.dropna()
df_network= df_network.drop(df_network.loc[df_network['channel']==''].index)

df_network['index'] = list(df_network.index)
df_network['channel_id'] = le.transform(df_network['channel'])
df_network['channel_follow_id'] = le.transform(df_network['channel_follow'])
df_network['weight'] = df_network['channel'].apply(lambda x: df_network[df_network['channel'] == x].shape[0])



G = nx.from_pandas_edgelist(df_network,"channel_id","channel_follow_id",['weight','user'],create_using=nx.MultiDiGraph)
factor = 1/(max(df_network['weight']))

attrs = {}
for id,row in df_network.iterrows():
    attrs[row['channel_id']] = {"channel": row['channel'], "user":row['user'],"weight":math.log(1 + row['weight']*factor)*40,"weight_o":row['weight']}
# attrs = {0: {"attr1": 20, "attr2": "nothing"}, 1: {"attr2": 3}}
nx.set_node_attributes(G, attrs)

#Communitiesc
# communities = community.greedy_modularity_communities(G)
# # Create empty dictionaries
# modularity_class = {}
# modularity_color = {}
# #Loop through each community in the network
# for community_number, comm in enumerate(communities):
#     #For each member of the community, add their community number and a distinct color
#     for name in comm: 
#         modularity_class[name] = community_number
#         modularity_color[name] = Spectral8[community_number]
# nx.set_node_attributes(G, modularity_class, 'modularity_class')
# nx.set_node_attributes(G, modularity_color, 'color')



edge_attrs = {}
for start_node, end_node, key, ch in G.edges(data=True,keys=True):
    edge_color = name_list[ch['user']]['color'] 
    edge_attrs[(start_node, end_node,key)] = edge_color
nx.set_edge_attributes(G, edge_attrs, "edge_color")

edges = G.edges(keys=True)
colors = [G[u][v][k]['edge_color'] for u,v,k in edges]
weights = [G[u][v][k]['weight'] for u,v,k in edges]

pos = nx.spring_layout(G,k=1/math.sqrt(len(G))*1.3) # Define the layout for node positioning
# nx.draw(G, pos, with_labels=True, node_size=300, node_color='skyblue',edge_color=colors, font_size=10, font_color='black')
# Display the graph
# plt.show()


graph = from_networkx(G, pos, scale=1.8, center=(0,0))
HOVER_TOOLTIPS = [("channel","@channel"),('views',"@weight_o")]
# x_range=(-2, 2), y_range=(-2, 2),
p = figure(
           x_axis_location=None, y_axis_location=None,
           tools="hover,box_zoom,pan,zoom_out,zoom_in,reset,save", tooltips=HOVER_TOOLTIPS)

p.grid.grid_line_color = None
# Add some new columns to the node renderer data source
# graph.node_renderer.data_source.data['index'] = list(range(len(G)))

# #Set node size and color
graph.node_renderer.glyph = Circle(size="weight", fill_color='cadetblue')
# # #Set edge opacity and width and color
graph.edge_renderer.glyph = MultiLine(line_color="edge_color",line_width=2,
                                               line_alpha=1)

# graph.node_renderer.glyph.update(size=20, fill_color="colors")
p.renderers.append(graph)
show(p)


In [None]:
# save_bokeh_graph(p,'methode3_graph_all')

### Recommendation

In [None]:
def create_table_recommendation(df):
    json_relation = {'source' :[],
                     'target':[],
                     'recommendation':[]}

    # recommendation take 1 if it was a recommendation or 0 if it was the original navigation 
    for r in range(df.shape[0]-1):
        
        row = df.iloc[r]
        if row['page_title'] == 'video':
            row_newt = df.iloc[r+1]
            try:
                recommendation = row['recommendation']
                recommendation_list = [recommendation[str(i)]['channel'] for i in range(0,10)]
                json_relation['source'] += [row['channel']]*(len(recommendation_list)+1)
                json_relation['target'] += recommendation_list + [row_newt['channel']]
                json_relation['recommendation'] += list(np.ones_like(recommendation_list) )
                json_relation['recommendation'] += [0]
            except Exception as e:
                print(row)
    df = pd.DataFrame(json_relation)
    df = df.drop(df.loc[df['target']==''].index)
    df['weight'] = df['target'].apply(lambda x: df.loc[df['target']==x].shape[0]/df.shape[0])
    return df

        


    
    

In [None]:
def plot_recommendation(df_relation):
    # display le nombre de target unique a partir d'une video



    le = LabelEncoder()
    le.fit(list(df_relation['source']) + list(df_relation['target']))
    df_relation = df_relation.dropna()
    df_relation['index'] = list(df_relation.index)
    df_relation['source_id'] = le.transform(df_relation['source'])
    df_relation['target_id'] = le.transform(df_relation['target'])
    # df_relation['weight'] = df_relation['channel'].apply(lambda x: df_network[df_network['channel'] == x].shape[0])


    G_rel = nx.from_pandas_edgelist(df_relation,"source_id","target_id","recommendation",create_using=nx.MultiDiGraph)
    # factor = 1/(max(df_network['weight']))




    attrs = {}
    attrs1 = {}
    for node_id,node in G_rel.nodes(data=True):
        attrs[node_id] = {'weight': df_relation.loc[df_relation['target_id']==node_id].iloc[0]['weight']*400,
                        'channel':df_relation.loc[df_relation['target_id']==node_id].iloc[0]['target'],
                        'percent':df_relation.loc[df_relation['target_id']==node_id].iloc[0]['weight']*100,
                        'nb_reco_unique':df_relation.loc[df_relation['target_id']==node_id].shape[0]}
    # for id,row in df_relation.iterrows():
    #     attrs[row['target_id']] = {"channel": row['target'], "weight":math.log(1 + row['weight']*factor)*40,"weight_o":row['weight']}
    # # attrs = {0: {"attr1": 20, "attr2": "nothing"}, 1: {"attr2": 3}}
    nx.set_node_attributes(G_rel, attrs)
    # nx.set_node_attributes(G_rel, attrs,'weight')
    # nx.set_node_attributes(G_rel, name='degree', values=degrees)



    COLOR_recom = 'tan' #'skyblue'
    COLOR_nav = 'gainsboro' #'sandybrown'

    edge_attrs = {}
    edge_attrs2 = {}
    edge_attrs3 = {}
    size1,size2 = 2,1

    for start_node, end_node, key, ch in G_rel.edges(data=True,keys=True):
        edge_color = COLOR_recom if ch['recommendation'] == 0 else COLOR_nav
        edge_attrs[(start_node, end_node,key)] = edge_color
        edge_attrs2[(start_node, end_node,key)] = size1 if ch['recommendation'] == 0 else size2
        edge_attrs3[(start_node, end_node,key)] = 1 if ch['recommendation'] == 0 else 0.7

    nx.set_edge_attributes(G_rel, edge_attrs, "edge_color")
    nx.set_edge_attributes(G_rel, edge_attrs2, "weight")
    nx.set_edge_attributes(G_rel, edge_attrs3, "alpha")




    #Communitiesc
    communities = community.greedy_modularity_communities(G_rel)
    # Create empty dictionaries
    modularity_class = {}
    modularity_color = {}
    #Loop through each community in the network
    for community_number, comm in enumerate(communities):
        #For each member of the community, add their community number and a distinct color
        for name in comm: 
            modularity_class[name] = community_number
            modularity_color[name] = Spectral8[community_number]
    nx.set_node_attributes(G_rel, modularity_class, 'modularity_class')
    nx.set_node_attributes(G_rel, modularity_color, 'color')


    # degrees = dict(nx.degree(G_rel))



    pos = nx.spring_layout(G_rel,k=1/math.sqrt(len(G_rel))*1.3) # Define the layout for node positioning
    graph = from_networkx(G_rel, pos, scale=1.8, center=(0,0))




    HOVER_TOOLTIPS = [('channel','@channel'),('percent','@percent'),
                    ("Modularity Class", "@modularity_class"),
                    ("Uniq rec", "@nb_reco_unique")]
    # x_range=(-2, 2), y_range=(-2, 2),
    # ("channel","@source")
    p = figure(
            x_axis_location=None, y_axis_location=None,
            tools="hover,box_zoom,pan,zoom_out,zoom_in,reset,save, tap", tooltips=HOVER_TOOLTIPS)

    p.grid.grid_line_color = None
    # Add some new columns to the node renderer data source
    # graph.node_renderer.data_source.data['index'] = list(range(len(G)))
    edge_highlight_color = 'darkslategrey'
    # #Set node size and color
    graph.node_renderer.glyph = Circle(fill_color='color',size='weight')
    graph.node_renderer.hover_glyph = Circle(size='weight', fill_color=edge_highlight_color, line_width=2)

    # # #Set edge opacity and width and color
    graph.edge_renderer.glyph = MultiLine(line_color="edge_color",line_width="weight",
                                                line_alpha="alpha")
    graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width="weight")

    #Highlight nodes and edges
    graph.selection_policy = NodesAndLinkedEdges()
    graph.inspection_policy = NodesAndLinkedEdges()


    #Add Labels
    x, y = zip(*graph.layout_provider.graph_layout.values())
    node_labels = [df_relation.loc[df_relation['target_id']==x].iloc[0]['target'] if ch['nb_reco_unique'] >2 else '' for x,ch in list(G_rel.nodes(data=True)) ]
    source = ColumnDataSource({'x': x, 'y': y, 'name': [node_labels[i] for i in range(len(x))]})
    labels = LabelSet(x='x', y='y', text='name', source=source, background_fill_color='white', text_font_size='10px', background_fill_alpha=.7)
    p.renderers.append(labels)

    # graph.node_renderer.glyph.update(size=20, fill_color="colors")
    p.renderers.append(graph)

    show(p)
    return p

#REMARQUE: cyprien: meme pourcentage que Jamy curieux 3.4 mais plus de degree (30 vs 8)

In [None]:
df_recom_all = load_data_csv('./data/Anais_methode2_2.txt')


In [None]:
df_relation_recom = create_table_recommendation(df_recom_all[-3:])
analaaa=df_relation_recom
p = plot_recommendation(df_relation_recom)

In [None]:
df_recom_all = load_data_csv('./data/navigation_data_m4.txt')
df_relation_recom = create_table_recommendation(df_recom_all[33:])
analaaa=df_relation_recom
p = plot_recommendation(df_relation_recom)
# save_bokeh_graph(p,'HD_m4_anais')

In [None]:
df_recom_all = load_data_csv('./data/Ella_methode4.txt')
df_relation_recom = create_table_recommendation(df_recom_all[33:])
analaaa=df_relation_recom
p = plot_recommendation(df_relation_recom)
# save_bokeh_graph(p,'M4_Lou')

In [None]:
df_recom_all = load_data_csv('./data/navigation_data_m1_m3.txt')

In [None]:
df_recom_all = load_data_csv('./data/Anais_methode3.txt')
df_recom_all.to_csv('Anais_methode3.csv',sep=';')
df_relation_recom = create_table_recommendation(df_recom_all)
analaaa=df_relation_recom
p = plot_recommendation(df_relation_recom)
# save_bokeh_graph(p,'HD_m4_anais')

In [None]:
save_bokeh_graph(p,'m3_recommendation_anais')