In [56]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import networkit as nk
import ipycytoscape 
import tabulate

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [57]:
G = nk.readGraph("graphs/graph_full_final.gml", nk.Format.GML)
nk.overview(G)
communities = nk.community.detectCommunities(G)
print('Communities Modularity', nk.community.Modularity().getQuality(communities, G)) 
# The value is in the range [-0.5,1] and usually depends both on the performance of the 
# algorithm and the presence of distinctive community structures in the network

print('Number of communities: ',communities.numberOfSubsets())
print('Communities sizes: ',communities.subsetSizes())
print('Number of nodes belonging to communities: ',communities.numberOfElements())

Network Properties:
nodes, edges			37476, 528026
directed?			False
weighted?			False
isolated nodes			0
self-loops			0
density				0.000752
clustering coefficient		0.039616
min/max/avg degree		1, 32, 28.179421
degree assortativity		0.310737
number of connected components	3
size of largest component	37472 (99.99 %)
Communities detected in 0.09515 [s]
solution properties:
-------------------  ------------
# communities           15
min community size       2
max community size   15307
avg. community size   2498.4
imbalance                6.12525
edge cut             98068
edge cut (portion)       0.185726
modularity               0.571586
-------------------  ------------
Communities Modularity 0.5715860918071738
Number of communities:  15
Communities sizes:  [15307, 2390, 4651, 2649, 6058, 1037, 1059, 2179, 1779, 334, 25, 2, 2, 2, 2]
Number of nodes belonging to communities:  37476



networkit.Timer is deprecated, will be removed in future updates.



In [97]:
df_communities=pd.DataFrame()
df_communities['size']=communities.subsetSizes()
df_communities['community_nb']=df_communities.index
df_communities.insert(0, 'community_nb', df_communities.pop('community_nb'))

fig = px.bar(df_communities, x='community_nb', y='size', color='size', height=400, text_auto='.2s',title='Distribution of movies in 9 main communities')
fig.show()
fig.write_html("html_files/communities_count_barplot.html")

fig = px.pie(df_communities, values='size', names='community_nb', title='Distribution of movies in 9 main communities', color='size')
fig.update_traces(textposition='inside', textinfo='value+percent')
fig.show()
fig.write_html("html_files/communities_count_pie.html")

In [59]:
df_clean = pd.read_csv('df_clean.csv', encoding='utf-8')
df_clean = df_clean.iloc[: , 1:]
df_clean['community_nb']= np.nan
df_clean.head(4)

Unnamed: 0,wikiID,freeID,name,release_date,runtime,languages,countries,genres,plot,characters,actors,director,color,community_nb
0,975900,/m/03vyhn,Ghosts of Mars,2001.0,long,{'English Language'},{'United States of America'},"{'Supernatural', 'Thriller', 'Science Fiction'...","Set in the second half of the 22nd century, th...","{'Benchley', 'Tres', 'Uno', 'Big Daddy Mars', ...","{'Liam Waite', 'Peter Jason', 'Rick Edelstein'...",John Carpenter,color,
1,9363483,/m/0285_cd,White Of The Eye,1987.0,long,{'English Language'},{'United Kingdom'},"{'Erotic thriller', 'Thriller', 'Psychological...",A series of murders of rich young women throug...,set(),"{'David Keith', 'Cathy Moriarty'}",Cassian Elwes,color,
2,261236,/m/01mrr1,A Woman in Flames,1983.0,long,{'German Language'},{'Germany'},{'Drama'},"Eva, an upper class housewife, becomes frustra...",set(),"{'Hanns Zischler', 'Mathieu Carrière', 'Gudrun...",Robert van Ackeren,color,
3,18998739,/m/04jcqvw,The Sorcerer's Apprentice,2002.0,long,{'English Language'},{'South Africa'},"{'Fantasy', 'World cinema', 'Family Film', 'Ad...","Every hundred years, the evil Morgana returns...",{'Morgana'},"{'Greg Melvill-Smith', 'Sean Taylor', 'Kelly L...",,,


In [60]:
index_community=9
#print(list(communities.getMembers(index_community)))

for i in range(communities.numberOfSubsets()):
    index=list(communities.getMembers(i))
    #print(i, max(index), min(index))
    df_clean.loc[index,'community_nb']=int(i)
df_clean.head(10)

[10240, 20481, 10242, 20495, 20496, 2065, 26643, 2070, 30748, 26660, 14377, 6188, 24620, 4145, 30769, 18487, 32827, 16453, 32840, 16475, 22625, 24682, 26741, 32889, 36996, 8327, 32906, 16527, 30881, 2220, 14514, 8372, 18612, 28855, 37055, 6342, 8393, 8395, 30931, 8404, 12505, 26846, 18658, 10470, 20710, 22762, 8438, 24822, 26871, 6394, 37118, 26886, 20743, 8456, 18712, 10525, 4382, 26911, 292, 4390, 317, 8509, 4415, 322, 28997, 35148, 31070, 33119, 18784, 26976, 33125, 22894, 18806, 4473, 24953, 29054, 12673, 8579, 22915, 29062, 37268, 20889, 12700, 16801, 20897, 16810, 35246, 436, 16821, 14781, 25034, 35279, 14804, 2519, 20957, 29149, 18914, 6648, 37376, 29187, 20999, 29199, 12825, 21022, 16931, 550, 35371, 2605, 19007, 10819, 581, 35405, 10832, 16983, 33377, 17004, 29295, 10865, 19058, 17017, 12933, 17032, 23180, 27281, 31381, 15002, 19103, 29353, 21173, 29365, 33461, 17086, 8898, 17097, 15053, 2772, 13014, 4827, 4834, 29413, 4838, 10993, 15116, 19220, 23321, 2849, 15149, 817, 819, 2

Unnamed: 0,wikiID,freeID,name,release_date,runtime,languages,countries,genres,plot,characters,actors,director,color,community_nb
0,975900,/m/03vyhn,Ghosts of Mars,2001.0,long,{'English Language'},{'United States of America'},"{'Supernatural', 'Thriller', 'Science Fiction'...","Set in the second half of the 22nd century, th...","{'Benchley', 'Tres', 'Uno', 'Big Daddy Mars', ...","{'Liam Waite', 'Peter Jason', 'Rick Edelstein'...",John Carpenter,color,0.0
1,9363483,/m/0285_cd,White Of The Eye,1987.0,long,{'English Language'},{'United Kingdom'},"{'Erotic thriller', 'Thriller', 'Psychological...",A series of murders of rich young women throug...,set(),"{'David Keith', 'Cathy Moriarty'}",Cassian Elwes,color,1.0
2,261236,/m/01mrr1,A Woman in Flames,1983.0,long,{'German Language'},{'Germany'},{'Drama'},"Eva, an upper class housewife, becomes frustra...",set(),"{'Hanns Zischler', 'Mathieu Carrière', 'Gudrun...",Robert van Ackeren,color,2.0
3,18998739,/m/04jcqvw,The Sorcerer's Apprentice,2002.0,long,{'English Language'},{'South Africa'},"{'Fantasy', 'World cinema', 'Family Film', 'Ad...","Every hundred years, the evil Morgana returns...",{'Morgana'},"{'Greg Melvill-Smith', 'Sean Taylor', 'Kelly L...",,,3.0
4,6631279,/m/0gffwj,Little city,1997.0,long,{'English Language'},{'United States of America'},"{'Romance Film', 'Comedy-drama', 'Ensemble Fil...","Adam, a San Francisco-based artist who works a...","{'Rebecca', 'Nina'}","{'Jon Bon Jovi', 'Josh Charles', 'Penelope Ann...",Roberto Benabib,color,0.0
5,171005,/m/016ywb,Henry V,1989.0,long,{'English Language'},{'United Kingdom'},"{'Epic', 'War film', 'Period piece', 'Film ada...",{{Plot|dateAct 1Act 2Act 3Act 4Act 5 Finally n...,"{'Mistress Nell Quickly', 'Duke Thomas Beaufor...","{'Robert Stephens', 'Alec McCowen', 'Richard B...",Kenneth Branagh,color,0.0
6,18296435,/m/04cqrs4,Aaah Belinda,1986.0,,{'Turkish Language'},{'Turkey'},{'Comedy'},"Serap, a young actress with a strong, lively p...",set(),{'Müjde Ar'},Atıf Yılmaz,,2.0
7,11250635,/m/02r52hc,The Mechanical Monsters,,,{'English Language'},{'United States of America'},"{'Science Fiction', 'Animation', 'Fantasy', 'A...",The story starts as one of the robots flies i...,set(),"{'Joan Alexander', 'Bud Collyer'}",Dave Fleischer,,1.0
8,77856,/m/0kcn7,Mary Poppins,1964.0,long,{'English Language'},{'United States of America'},"{'Musical', ""Children's/Family"", 'Fantasy', ""C...",The film opens with Mary Poppins perched in a...,"{'Admiral Boom', 'Mr. Dawes Senior', 'George B...","{'Dick Van Dyke', 'Matthew Garber', 'David Tom...",Robert Stevenson,color,0.0
9,21926710,/m/05p45cv,White on Rice,2009.0,long,set(),{'United States of America'},"{'Romance Film', 'Indie', 'Romantic comedy', '...",Jimmy ([[Hiroshi Watanabe loves dinosaurs and...,set(),"{'Justin Kwong', 'Jennifer Klekas', 'Hiroshi W...",Dave Boyle,color,0.0


In [61]:
df_clean2=df_clean.copy()
df_clean2.insert(0, 'community_nb', df_clean2.pop('community_nb'))
df_clean2['genres'] = df_clean['genres'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['genres'] = df_clean2['genres'].apply(lambda x: None if x==['set()'] else x)
df_clean2['languages'] = df_clean['languages'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['languages'] = df_clean2['languages'].apply(lambda x: None if x==['set()'] else x)
df_clean2['countries'] = df_clean['countries'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['countries'] = df_clean2['countries'].apply(lambda x: None if x==['set()'] else x)
df_clean2['characters'] = df_clean['characters'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", "))
df_clean2['characters'] = df_clean2['characters'].apply(lambda x: None if x==['set()'] else x)
df_clean2['actors'] = df_clean['actors'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['actors'] = df_clean2['actors'].apply(lambda x: None if x==['set()'] else x)
df_clean2['decade'] = df_clean2.copy()['release_date'].apply(lambda x: x-x%10)
df_clean2.head()

Unnamed: 0,community_nb,wikiID,freeID,name,release_date,runtime,languages,countries,genres,plot,characters,actors,director,color,decade
0,0.0,975900,/m/03vyhn,Ghosts of Mars,2001.0,long,[English Language],[United States of America],"[Supernatural, Thriller, Science Fiction, Spac...","Set in the second half of the 22nd century, th...","[Benchley, Tres, Uno, Big Daddy Mars, Dos, Lie...","[Liam Waite, Peter Jason, Rick Edelstein, Ice ...",John Carpenter,color,2000.0
1,1.0,9363483,/m/0285_cd,White Of The Eye,1987.0,long,[English Language],[United Kingdom],"[Erotic thriller, Thriller, Psychological thri...",A series of murders of rich young women throug...,,"[David Keith, Cathy Moriarty]",Cassian Elwes,color,1980.0
2,2.0,261236,/m/01mrr1,A Woman in Flames,1983.0,long,[German Language],[Germany],[Drama],"Eva, an upper class housewife, becomes frustra...",,"[Hanns Zischler, Mathieu Carrière, Gudrun Land...",Robert van Ackeren,color,1980.0
3,3.0,18998739,/m/04jcqvw,The Sorcerer's Apprentice,2002.0,long,[English Language],[South Africa],"[Fantasy, World cinema, Family Film, Adventure]","Every hundred years, the evil Morgana returns...",[Morgana],"[Greg Melvill-Smith, Sean Taylor, Kelly LeBroc...",,,2000.0
4,0.0,6631279,/m/0gffwj,Little city,1997.0,long,[English Language],[United States of America],"[Romance Film, Comedy-drama, Ensemble Film, Dr...","Adam, a San Francisco-based artist who works a...","[Rebecca, Nina]","[Jon Bon Jovi, Josh Charles, Penelope Ann Mill...",Roberto Benabib,color,1990.0


In [62]:
df_prob = pd.read_csv('./topics/topics_probabilities_new_embedding.csv', header=None)    #load des probas d'appartenir a un topic pour grand embedding modèle, sans reduction topic, avec noms
df_topic = pd.read_csv('./topics/topic_info.csv') 

In [63]:
df_prob.columns= df_topic['Name'].values.tolist()[1:]
df_clean2 = df_clean2.join(df_prob)

In [64]:
dict_communities={}

for i in range(communities.numberOfSubsets()):
    df_comm_topic=pd.DataFrame()
    df_comm_topic['count']=df_clean2.loc[df_clean2['community_nb']==i].iloc[:, 15:].idxmax(axis=1).value_counts()
    df_comm_topic['first topic']=df_comm_topic.index
    df_comm_topic.reset_index(inplace=True, drop=True)

    df_comm_genre=pd.DataFrame()
    df_comm_genre['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('genres')['genres'].value_counts())
    df_comm_genre['genre']=df_comm_genre.index
    df_comm_genre.reset_index(inplace=True, drop=True)

    df_comm_countries=pd.DataFrame()
    df_comm_countries['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('countries')['countries'].value_counts())
    df_comm_countries['country']=df_comm_countries.index
    df_comm_countries.reset_index(inplace=True, drop=True)

    df_comm_languages=pd.DataFrame()
    df_comm_languages['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('languages')['languages'].value_counts())
    df_comm_languages['languages']=df_comm_languages.index
    df_comm_languages.reset_index(inplace=True, drop=True)

    df_comm_actors=pd.DataFrame()
    df_comm_actors['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('actors')['actors'].value_counts())
    df_comm_actors['actor']=df_comm_actors.index
    df_comm_actors.reset_index(inplace=True, drop=True)

    df_comm_characters=pd.DataFrame()
    df_comm_characters['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('characters')['characters'].value_counts())
    df_comm_characters['character']=df_comm_characters.index
    df_comm_characters.reset_index(inplace=True, drop=True)

    df_comm_director=pd.DataFrame()
    df_comm_director['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['director'].value_counts())
    df_comm_director['director']=df_comm_director.index
    df_comm_director.reset_index(inplace=True, drop=True)

    df_comm_runtime=pd.DataFrame()
    df_comm_runtime['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['runtime'].value_counts())
    df_comm_runtime['runtime']=df_comm_runtime.index
    df_comm_runtime.reset_index(inplace=True, drop=True)

    df_comm_color=pd.DataFrame()
    df_comm_color['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['color'].value_counts())
    df_comm_color['color']=df_comm_color.index
    df_comm_color.reset_index(inplace=True, drop=True)

    df_comm_decade=pd.DataFrame()
    df_comm_decade['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('decade')['decade'].value_counts())
    df_comm_decade['decade']=df_comm_decade.index
    df_comm_decade.reset_index(inplace=True, drop=True)

    dict_communities[i]={"first topic":df_comm_topic, "genre":df_comm_genre, "countries":df_comm_countries,"languages":df_comm_languages,
                    "actors":  df_comm_actors, "characters":df_comm_characters, "director": df_comm_director, "runtime":df_comm_runtime, 
                    "color":df_comm_color, "decade":df_comm_decade}


In [98]:
index_community=5

fig = px.bar(dict_communities[index_community]['first topic'][:10], x='first topic', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['genre'][:10], x='genre', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['countries'][:10], x='country', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['languages'][:10], x='languages', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['actors'][:10], x='actor', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['characters'][:10], x='character', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['director'][:10], x='director', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['runtime'][:10], x='runtime', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['color'][:2], x='color', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['decade'][:10], x='decade', y='count', title="Title")
fig.show()

In [94]:
fig=make_subplots(rows=2, cols=5)

fig1 = px.bar(dict_communities[index_community]['first topic'][:10], x='first topic', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=1)

fig1 = px.bar(dict_communities[index_community]['genre'][:10], x='genre', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=2)


fig1= px.bar(dict_communities[index_community]['countries'][:10], x='country', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=3)

fig1 = px.bar(dict_communities[index_community]['languages'][:10], x='languages', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=4)

fig1 = px.bar(dict_communities[index_community]['actors'][:10], x='actor', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=5)

fig1 = px.bar(dict_communities[index_community]['characters'][:10], x='character', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=1)

fig1 = px.bar(dict_communities[index_community]['director'][:10], x='director', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=2)

fig1 = px.bar(dict_communities[index_community]['runtime'][:10], x='runtime', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=3)

fig1 = px.bar(dict_communities[index_community]['color'][:2], x='color', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1,  row=2, col=4)

fig1 = px.bar(dict_communities[index_community]['decade'][:10], x='decade', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1,  row=2, col=5)

fig.update_layout(height=700, width=1100, title_text="Multiple Subplots with Titles")

fig.show()

In [67]:
#df_test=dict_communities[index_community]['genre'].copy()
#df_test.loc[df_test['count']*100/sum(df_test['count']) < 1, 'genre'] = 'Other genres' # Represent only large countries
#fig = px.pie(df_test.loc, values='count', names='genre', title='Title')
#fig.show()

fig = px.pie(dict_communities[index_community]['genre'], values='count', names='genre', title='Title')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [82]:
import plotly.express as px

feature='first topic'

# Create the plots
fig1 = px.bar(dict_communities[0][feature][:10], x=feature, y='count', title="Plot 0")
fig2 = px.bar(dict_communities[1][feature][:10], x=feature, y='count', title="Plot 1")
fig3 = px.bar(dict_communities[2][feature][:10], x=feature, y='count', title="Plot 2")
fig4 = px.bar(dict_communities[3][feature][:10], x=feature, y='count', title="Plot 3")
fig5 = px.bar(dict_communities[4][feature][:10], x=feature, y='count', title="Plot 4")
fig6 = px.bar(dict_communities[5][feature][:10], x=feature, y='count', title="Plot 5")
fig7 = px.bar(dict_communities[6][feature][:10], x=feature, y='count', title="Plot 6")
fig8 = px.bar(dict_communities[7][feature][:10], x=feature, y='count', title="Plot 7")
fig9 = px.bar(dict_communities[8][feature][:10], x=feature, y='count', title="Plot 8")

# Extract the data and layout from the plotly.express figures
data1 = fig1.data[0]
layout1 = fig1.layout
data2 = fig2.data[0]
layout2 = fig2.layout
data3 = fig3.data[0]
layout3 = fig3.layout
data4 = fig4.data[0]
layout4 = fig4.layout
data5 = fig5.data[0]
layout5 = fig5.layout
data6 = fig6.data[0]
layout6 = fig6.layout
data7 = fig7.data[0]
layout7 = fig7.layout
data8 = fig8.data[0]
layout8 = fig8.layout
data9 = fig9.data[0]
layout9 = fig9.layout

# Create a dropdown menu for switching between plots
updatemenus = [
    {
        "buttons":
        [
            {
                "args": [{"visible": [True, False, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 0",
                "method": "update"
            },
            {
                "args": [{"visible": [False, True, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 1",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, True, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 2",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, True, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 3",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, True, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 4",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, True, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 5",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, True, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 6",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, True, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 7",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, False, True]},
                         {"title": feature, "annotations": []}],
                "label": "Community 8",
                "method": "update"
            },
        ],
        "direction": "down",
        "showactive": True,
        "type": "buttons", #"topdown"
        "x": 1.05,
        "xanchor": "left",
        "y": 1.1,
        "yanchor": "top"
    }
]   


# Add the dropdown menu to the layout
layout = go.Layout(updatemenus=updatemenus)

# Create the bar traces
bar1 = go.Bar(data1, visible=True)
bar2 = go.Bar(data2, visible=False)
bar3 = go.Bar(data3, visible=False)
bar4 = go.Bar(data4, visible=False)
bar5 = go.Bar(data5, visible=False)
bar6 = go.Bar(data6, visible=False)
bar7 = go.Bar(data7, visible=False)
bar8 = go.Bar(data8, visible=False)
bar9 = go.Bar(data9, visible=False)

# Combine the traces into a single figure
fig = go.Figure(data=[bar1, bar2, bar3, bar4, bar5, bar6, bar7, bar8, bar9], layout=layout)

# Display the figure
fig.show()

feature='genre'

# Create the plots
fig1 = px.bar(dict_communities[0][feature][:10], x=feature, y='count', title="Plot 0")
fig2 = px.bar(dict_communities[1][feature][:10], x=feature, y='count', title="Plot 1")
fig3 = px.bar(dict_communities[2][feature][:10], x=feature, y='count', title="Plot 2")
fig4 = px.bar(dict_communities[3][feature][:10], x=feature, y='count', title="Plot 3")
fig5 = px.bar(dict_communities[4][feature][:10], x=feature, y='count', title="Plot 4")
fig6 = px.bar(dict_communities[5][feature][:10], x=feature, y='count', title="Plot 5")
fig7 = px.bar(dict_communities[6][feature][:10], x=feature, y='count', title="Plot 6")
fig8 = px.bar(dict_communities[7][feature][:10], x=feature, y='count', title="Plot 7")
fig9 = px.bar(dict_communities[8][feature][:10], x=feature, y='count', title="Plot 8")

# Extract the data and layout from the plotly.express figures
data1 = fig1.data[0]
layout1 = fig1.layout
data2 = fig2.data[0]
layout2 = fig2.layout
data3 = fig3.data[0]
layout3 = fig3.layout
data4 = fig4.data[0]
layout4 = fig4.layout
data5 = fig5.data[0]
layout5 = fig5.layout
data6 = fig6.data[0]
layout6 = fig6.layout
data7 = fig7.data[0]
layout7 = fig7.layout
data8 = fig8.data[0]
layout8 = fig8.layout
data9 = fig9.data[0]
layout9 = fig9.layout

# Create a dropdown menu for switching between plots
updatemenus = [
    {
        "buttons":
        [
            {
                "args": [{"visible": [True, False, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 0",
                "method": "update"
            },
            {
                "args": [{"visible": [False, True, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 1",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, True, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 2",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, True, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 3",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, True, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 4",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, True, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 5",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, True, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 6",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, True, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 7",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, False, True]},
                         {"title": feature, "annotations": []}],
                "label": "Community 8",
                "method": "update"
            },
        ],
        "direction": "down",
        "showactive": True,
        "type": "buttons", #"topdown"
        "x": 1.05,
        "xanchor": "left",
        "y": 1.1,
        "yanchor": "top"
    }
]   


# Add the dropdown menu to the layout
layout = go.Layout(updatemenus=updatemenus)

# Create the bar traces
bar1 = go.Bar(data1, visible=True)
bar2 = go.Bar(data2, visible=False)
bar3 = go.Bar(data3, visible=False)
bar4 = go.Bar(data4, visible=False)
bar5 = go.Bar(data5, visible=False)
bar6 = go.Bar(data6, visible=False)
bar7 = go.Bar(data7, visible=False)
bar8 = go.Bar(data8, visible=False)
bar9 = go.Bar(data9, visible=False)

# Combine the traces into a single figure
fig = go.Figure(data=[bar1, bar2, bar3, bar4, bar5, bar6, bar7, bar8, bar9], layout=layout)

# Display the figure
fig.show()

feature1= 'characters'
feature= 'character'
# Create the plots
fig1 = px.bar(dict_communities[0][feature1][:10], x=feature, y='count', title="Plot 0")
fig2 = px.bar(dict_communities[1][feature1][:10], x=feature, y='count', title="Plot 1")
fig3 = px.bar(dict_communities[2][feature1][:10], x=feature, y='count', title="Plot 2")
fig4 = px.bar(dict_communities[3][feature1][:10], x=feature, y='count', title="Plot 3")
fig5 = px.bar(dict_communities[4][feature1][:10], x=feature, y='count', title="Plot 4")
fig6 = px.bar(dict_communities[5][feature1][:10], x=feature, y='count', title="Plot 5")
fig7 = px.bar(dict_communities[6][feature1][:10], x=feature, y='count', title="Plot 6")
fig8 = px.bar(dict_communities[7][feature1][:10], x=feature, y='count', title="Plot 7")
fig9 = px.bar(dict_communities[8][feature1][:10], x=feature, y='count', title="Plot 8")

# Extract the data and layout from the plotly.express figures
data1 = fig1.data[0]
layout1 = fig1.layout
data2 = fig2.data[0]
layout2 = fig2.layout
data3 = fig3.data[0]
layout3 = fig3.layout
data4 = fig4.data[0]
layout4 = fig4.layout
data5 = fig5.data[0]
layout5 = fig5.layout
data6 = fig6.data[0]
layout6 = fig6.layout
data7 = fig7.data[0]
layout7 = fig7.layout
data8 = fig8.data[0]
layout8 = fig8.layout
data9 = fig9.data[0]
layout9 = fig9.layout

# Create a dropdown menu for switching between plots
updatemenus = [
    {
        "buttons":
        [
            {
                "args": [{"visible": [True, False, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 0",
                "method": "update"
            },
            {
                "args": [{"visible": [False, True, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 1",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, True, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 2",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, True, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 3",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, True, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 4",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, True, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 5",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, True, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 6",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, True, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 7",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, False, True]},
                         {"title": feature, "annotations": []}],
                "label": "Community 8",
                "method": "update"
            },
        ],
        "direction": "down",
        "showactive": True,
        "type": "buttons", #"topdown"
        "x": 1.05,
        "xanchor": "left",
        "y": 1.1,
        "yanchor": "top"
    }
]   


# Add the dropdown menu to the layout
layout = go.Layout(updatemenus=updatemenus)

# Create the bar traces
bar1 = go.Bar(data1, visible=True)
bar2 = go.Bar(data2, visible=False)
bar3 = go.Bar(data3, visible=False)
bar4 = go.Bar(data4, visible=False)
bar5 = go.Bar(data5, visible=False)
bar6 = go.Bar(data6, visible=False)
bar7 = go.Bar(data7, visible=False)
bar8 = go.Bar(data8, visible=False)
bar9 = go.Bar(data9, visible=False)

# Combine the traces into a single figure
fig = go.Figure(data=[bar1, bar2, bar3, bar4, bar5, bar6, bar7, bar8, bar9], layout=layout)

# Display the figure
fig.show()


In [75]:
from PIL import Image

In [91]:
def plot_community(index_community):

    path = "images/clusters/cluster" + str(index_community) + ".png"
    pyLogo = Image.open(path)

    plot_name = "cluster "+str(index_community)+" graph"

    fig = make_subplots(
    rows=3, cols=5,
    specs=[[{"colspan": 5}, None, None, None, None],
           [{}, {}, {}, {}, {}],
           [{}, {}, {}, {}, {}]],
    subplot_titles=(plot_name,"Decade","Characters", "Directors","Runtime","Color","Actors","Languages", "Countries", "Genres", "Most represented topics"),
    row_heights=[0.5, 0.2, 0.2],
    vertical_spacing = 0.15,
    column_widths=[10,10,10,10,10])

    #fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2]),
    #                 row=1, col=1)


    # Constants
    img_width = 200                #1000
    img_height = 200                #900
    scale_factor = 1  #0.5

    # Add invisible scatter trace.
    # This trace is added to help the autoresize logic work.
    fig.add_trace(
        go.Scatter(
            x=[0, img_width * scale_factor],
            y=[0, img_height * scale_factor],
            mode="markers",
            marker_opacity=0
        )
    )

    # Add image
    fig.add_layout_image(
        dict(
            x=60,
            sizex=img_width * scale_factor,
            y=img_height * scale_factor,
            sizey=img_height * scale_factor,
            xref="x",
            yref="y",
            opacity=1.0,
            #layer="below",
            source=pyLogo)
    )

    fig.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        autosize=False,
        width=1000,
        height=1000
    )

    fig1 = px.bar(dict_communities[index_community]['actors'][:10], x='actor', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=3, col=1)

    fig1 = px.bar(dict_communities[index_community]['characters'][:10], x='character', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=2, col=2)

    fig1 = px.bar(dict_communities[index_community]['director'][:10], x='director', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=2, col=3)

    fig1 = px.bar(dict_communities[index_community]['runtime'][:10], x='runtime', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=2, col=4)

    fig1 = px.bar(dict_communities[index_community]['color'][:2], x='color', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1,  row=2, col=5)

    fig1 = px.bar(dict_communities[index_community]['decade'][:10], x='decade', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1,  row=2, col=1)

    fig1 = px.bar(dict_communities[index_community]['languages'][:10], x='languages', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=3, col=2)

    fig1= px.bar(dict_communities[index_community]['countries'][:10], x='country', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=3, col=3)

    fig1 = px.bar(dict_communities[index_community]['genre'][:10], x='genre', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=3, col=4)

    fig1 = px.bar(dict_communities[index_community]['first topic'][:10], x='first topic', y='count', title="Title")
    data1 = fig1.data[0]
    layout1 = fig1.layout
    bar1 = go.Bar(data1)
    fig.add_trace(bar1, row=3, col=5)


    fig.update_xaxes(showticklabels=True) # hide all the xticks
    fig.update_xaxes(showticklabels=True, row=1, col=1)
    fig.update_yaxes(showticklabels=False, row=1, col=1)
    fig.update_xaxes(tickangle=-45)
    fig.update_xaxes(tickangle=0, row=2,col=4)
    fig.update_xaxes(tickangle=0, row=2,col=5)
    fig.update_xaxes(tickangle=0, row=2,col=1)

    fig.update_layout(showlegend=False)

    fig.update_yaxes(visible=False, showticklabels=False, row=1, col=1)
    fig.update_xaxes(visible=False, showticklabels=False, row=1, col=1)

#reduce taille
    fig.update_layout(width=int(700))
    fig.update_layout(height=int(700))
#reduce font size
    fig.update_layout(font=dict(size=9))
    fig.update_annotations(font_size=11)

    save_path = "html_files/cluster" + str(index_community) + "_plotly.html"
    fig.write_html(save_path)

    return fig

In [92]:
#lance ça camille xxxxxx
for i in range(9):
    plot_community(i)