In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import networkit as nk
import ipycytoscape 
import tabulate

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
G = nk.readGraph("graphs/graph_full_final.gml", nk.Format.GML)
nk.overview(G)
communities = nk.community.detectCommunities(G)
print('Communities Modularity', nk.community.Modularity().getQuality(communities, G)) 
# The value is in the range [-0.5,1] and usually depends both on the performance of the 
# algorithm and the presence of distinctive community structures in the network

print('Number of communities: ',communities.numberOfSubsets())
print('Communities sizes: ',communities.subsetSizes())
print('Number of nodes belonging to communities: ',communities.numberOfElements())

In [None]:
df_communities=pd.DataFrame()
df_communities['size']=communities.subsetSizes()
df_communities['community_nb']=df_communities.index
df_communities.insert(0, 'community_nb', df_communities.pop('community_nb'))

fig = px.bar(df_communities, x='community_nb', y='size', color='size', height=400, text_auto='.2s',title='Title')
fig.show()

fig = px.pie(df_communities, values='size', names='community_nb', title='Title', color='size')
fig.update_traces(textposition='inside', textinfo='value')
fig.show()

In [None]:
df_clean = pd.read_csv('df_clean.csv', encoding='utf-8')
df_clean = df_clean.iloc[: , 1:]
df_clean['community_nb']= np.nan
df_clean.head(4)

In [None]:
index_community=9
print(list(communities.getMembers(index_community)))

for i in range(communities.numberOfSubsets()):
    index=list(communities.getMembers(i))
    #print(i, max(index), min(index))
    df_clean.loc[index,'community_nb']=int(i)
df_clean.head(10)

In [None]:
df_clean2=df_clean.copy()
df_clean2.insert(0, 'community_nb', df_clean2.pop('community_nb'))
df_clean2['genres'] = df_clean['genres'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['genres'] = df_clean2['genres'].apply(lambda x: None if x==['set()'] else x)
df_clean2['languages'] = df_clean['languages'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['languages'] = df_clean2['languages'].apply(lambda x: None if x==['set()'] else x)
df_clean2['countries'] = df_clean['countries'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['countries'] = df_clean2['countries'].apply(lambda x: None if x==['set()'] else x)
df_clean2['characters'] = df_clean['characters'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", "))
df_clean2['characters'] = df_clean2['characters'].apply(lambda x: None if x==['set()'] else x)
df_clean2['actors'] = df_clean['actors'].apply(lambda x: x.replace("'","").replace("{","").replace("}","").split(", ")) 
df_clean2['actors'] = df_clean2['actors'].apply(lambda x: None if x==['set()'] else x)
df_clean2['decade'] = df_clean2.copy()['release_date'].apply(lambda x: x-x%10)
df_clean2.head()

In [None]:
df_prob = pd.read_csv('./topics/topics_probabilities_new_embedding.csv', header=None)    #load des probas d'appartenir a un topic pour grand embedding modèle, sans reduction topic, avec noms
df_topic = pd.read_csv('./topics/topic_info.csv') 

In [None]:
df_prob.columns= df_topic['Name'].values.tolist()[1:]
df_clean2 = df_clean2.join(df_prob)

In [None]:
dict_communities={}

for i in range(communities.numberOfSubsets()):
    df_comm_topic=pd.DataFrame()
    df_comm_topic['count']=df_clean2.loc[df_clean2['community_nb']==i].iloc[:, 15:].idxmax(axis=1).value_counts()
    df_comm_topic['first topic']=df_comm_topic.index
    df_comm_topic.reset_index(inplace=True, drop=True)

    df_comm_genre=pd.DataFrame()
    df_comm_genre['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('genres')['genres'].value_counts())
    df_comm_genre['genre']=df_comm_genre.index
    df_comm_genre.reset_index(inplace=True, drop=True)

    df_comm_countries=pd.DataFrame()
    df_comm_countries['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('countries')['countries'].value_counts())
    df_comm_countries['country']=df_comm_countries.index
    df_comm_countries.reset_index(inplace=True, drop=True)

    df_comm_languages=pd.DataFrame()
    df_comm_languages['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('languages')['languages'].value_counts())
    df_comm_languages['languages']=df_comm_languages.index
    df_comm_languages.reset_index(inplace=True, drop=True)

    df_comm_actors=pd.DataFrame()
    df_comm_actors['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('actors')['actors'].value_counts())
    df_comm_actors['actor']=df_comm_actors.index
    df_comm_actors.reset_index(inplace=True, drop=True)

    df_comm_characters=pd.DataFrame()
    df_comm_characters['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('characters')['characters'].value_counts())
    df_comm_characters['character']=df_comm_characters.index
    df_comm_characters.reset_index(inplace=True, drop=True)

    df_comm_director=pd.DataFrame()
    df_comm_director['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['director'].value_counts())
    df_comm_director['director']=df_comm_director.index
    df_comm_director.reset_index(inplace=True, drop=True)

    df_comm_runtime=pd.DataFrame()
    df_comm_runtime['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['runtime'].value_counts())
    df_comm_runtime['runtime']=df_comm_runtime.index
    df_comm_runtime.reset_index(inplace=True, drop=True)

    df_comm_color=pd.DataFrame()
    df_comm_color['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i]['color'].value_counts())
    df_comm_color['color']=df_comm_color.index
    df_comm_color.reset_index(inplace=True, drop=True)

    df_comm_decade=pd.DataFrame()
    df_comm_decade['count']=pd.DataFrame(df_clean2.loc[df_clean2['community_nb']==i].explode('decade')['decade'].value_counts())
    df_comm_decade['decade']=df_comm_decade.index
    df_comm_decade.reset_index(inplace=True, drop=True)

    dict_communities[i]={"first topic":df_comm_topic, "genre":df_comm_genre, "countries":df_comm_countries,"languages":df_comm_languages,
                    "actors":  df_comm_actors, "characters":df_comm_characters, "director": df_comm_director, "runtime":df_comm_runtime, 
                    "color":df_comm_color, "decade":df_comm_decade}


In [None]:
index_community=1

fig = px.bar(dict_communities[index_community]['first topic'][:10], x='first topic', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['genre'][:10], x='genre', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['countries'][:10], x='country', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['languages'][:10], x='languages', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['actors'][:10], x='actor', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['characters'][:10], x='character', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['director'][:10], x='director', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['runtime'][:10], x='runtime', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['color'][:2], x='color', y='count', title="Title")
fig.show()

fig = px.bar(dict_communities[index_community]['decade'][:10], x='decade', y='count', title="Title")
fig.show()

In [None]:
fig=make_subplots(rows=2, cols=5)

fig1 = px.bar(dict_communities[index_community]['first topic'][:10], x='first topic', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=1)

fig1 = px.bar(dict_communities[index_community]['genre'][:10], x='genre', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=2)


fig1= px.bar(dict_communities[index_community]['countries'][:10], x='country', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=3)

fig1 = px.bar(dict_communities[index_community]['languages'][:10], x='languages', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=4)

fig1 = px.bar(dict_communities[index_community]['actors'][:10], x='actor', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=1, col=5)

fig1 = px.bar(dict_communities[index_community]['characters'][:10], x='character', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=1)

fig1 = px.bar(dict_communities[index_community]['director'][:10], x='director', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=2)

fig1 = px.bar(dict_communities[index_community]['runtime'][:10], x='runtime', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1, row=2, col=3)

fig1 = px.bar(dict_communities[index_community]['color'][:2], x='color', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1,  row=2, col=4)

fig1 = px.bar(dict_communities[index_community]['decade'][:10], x='decade', y='count', title="Title")
data1 = fig1.data[0]
layout1 = fig1.layout
bar1 = go.Bar(data1)
fig.add_trace(bar1,  row=2, col=5)

fig.update_layout(height=700, width=1100, title_text="Multiple Subplots with Titles")

fig.show()

In [None]:
#df_test=dict_communities[index_community]['genre'].copy()
#df_test.loc[df_test['count']*100/sum(df_test['count']) < 1, 'genre'] = 'Other genres' # Represent only large countries
#fig = px.pie(df_test.loc, values='count', names='genre', title='Title')
#fig.show()

fig = px.pie(dict_communities[index_community]['genre'], values='count', names='genre', title='Title')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [None]:
import plotly.express as px

feature='genre'

# Create the plots
fig1 = px.bar(dict_communities[0][feature][:10], x=feature, y='count', title="Plot 0")
fig2 = px.bar(dict_communities[1][feature][:10], x=feature, y='count', title="Plot 1")
fig3 = px.bar(dict_communities[2][feature][:10], x=feature, y='count', title="Plot 2")
fig4 = px.bar(dict_communities[3][feature][:10], x=feature, y='count', title="Plot 3")
fig5 = px.bar(dict_communities[4][feature][:10], x=feature, y='count', title="Plot 4")
fig6 = px.bar(dict_communities[5][feature][:10], x=feature, y='count', title="Plot 5")
fig7 = px.bar(dict_communities[6][feature][:10], x=feature, y='count', title="Plot 6")
fig8 = px.bar(dict_communities[7][feature][:10], x=feature, y='count', title="Plot 7")
fig9 = px.bar(dict_communities[8][feature][:10], x=feature, y='count', title="Plot 8")

# Extract the data and layout from the plotly.express figures
data1 = fig1.data[0]
layout1 = fig1.layout
data2 = fig2.data[0]
layout2 = fig2.layout
data3 = fig3.data[0]
layout3 = fig3.layout
data4 = fig4.data[0]
layout4 = fig4.layout
data5 = fig5.data[0]
layout5 = fig5.layout
data6 = fig6.data[0]
layout6 = fig6.layout
data7 = fig7.data[0]
layout7 = fig7.layout
data8 = fig8.data[0]
layout8 = fig8.layout
data9 = fig9.data[0]
layout9 = fig9.layout

# Create a dropdown menu for switching between plots
updatemenus = [
    {
        "buttons":
        [
            {
                "args": [{"visible": [True, False, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 0",
                "method": "update"
            },
            {
                "args": [{"visible": [False, True, False, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 1",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, True, False, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 2",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, True, False, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 3",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, True, False, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 4",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, True, False, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 5",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, True, False, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 6",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, True, False]},
                         {"title": feature, "annotations": []}],
                "label": "Community 7",
                "method": "update"
            },
            {
                "args": [{"visible": [False, False, False, False, False, False, False, False, True]},
                         {"title": feature, "annotations": []}],
                "label": "Community 8",
                "method": "update"
            },
        ],
        "direction": "down",
        "showactive": True,
        "type": "buttons", #"topdown"
        "x": 1.05,
        "xanchor": "left",
        "y": 1.1,
        "yanchor": "top"
    }
]   


# Add the dropdown menu to the layout
layout = go.Layout(updatemenus=updatemenus)

# Create the bar traces
bar1 = go.Bar(data1, visible=True)
bar2 = go.Bar(data2, visible=False)
bar3 = go.Bar(data3, visible=False)
bar4 = go.Bar(data4, visible=False)
bar5 = go.Bar(data5, visible=False)
bar6 = go.Bar(data6, visible=False)
bar7 = go.Bar(data7, visible=False)
bar8 = go.Bar(data8, visible=False)
bar9 = go.Bar(data9, visible=False)

# Combine the traces into a single figure
fig = go.Figure(data=[bar1, bar2, bar3, bar4, bar5, bar6, bar7, bar8, bar9], layout=layout)

# Display the figure
fig.show()