In [1]:
import pandas as pd
import networkx as nx
import numpy as np

In [2]:
df = pd.read_csv('../datasets/netflix_titles.csv')
df[df['country']=='Indonesia']

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
31,s32,Movie,#FriendButMarried,Rako Prijanto,"Adipati Dolken, Vanesha Prescilla, Rendi Jhon,...",Indonesia,"May 21, 2020",2018,TV-G,102 min,"Dramas, International Movies, Romantic Movies","Pining for his high school crush for years, a ..."
32,s33,Movie,#FriendButMarried 2,Rako Prijanto,"Adipati Dolken, Mawar de Jongh, Sari Nila, Von...",Indonesia,"June 28, 2020",2020,TV-G,104 min,"Dramas, International Movies, Romantic Movies",As Ayu and Ditto finally transition from best ...
57,s58,Movie,100% Halal,Jastis Arimba,"Anisa Rahma, Ariyo Wahab, Anandito Dwis, Fitri...",Indonesia,"January 7, 2021",2020,TV-14,101 min,"Dramas, International Movies","After high school, a young woman marries the m..."
99,s100,Movie,3 Heroines,Iman Brotoseno,"Reza Rahadian, Bunga Citra Lestari, Tara Basro...",Indonesia,"January 5, 2019",2016,TV-PG,124 min,"Dramas, International Movies, Sports Movies",Three Indonesian women break records by becomi...
123,s124,Movie,5CM,Rizal Mantovani,"Herjunot Ali, Raline Shah, Fedi Nuril, Pevita ...",Indonesia,"September 28, 2018",2012,TV-14,126 min,"Dramas, International Movies",Five friends embark on a mission to climb the ...
...,...,...,...,...,...,...,...,...,...,...,...,...
7147,s7148,Movie,Too Handsome to Handle,Sabrina Rochelle Kalangie,"Ari Irham, Nikita Willy, Calvin Jeremy, Rachel...",Indonesia,"December 31, 2020",2019,TV-14,106 min,"Comedies, Dramas, International Movies",Wary of the effects of his good looks on other...
7407,s7408,Movie,Verses of Love,Hanung Bramantyo,"Fedi Nuril, Rianti Cartwright, Carissa Putri, ...",Indonesia,"March 27, 2019",2008,TV-PG,126 min,"Dramas, International Movies, Romantic Movies","Polygamy, piety and personal principles collid..."
7408,s7409,Movie,Verses of Love 2,Guntur Soeharjanto,"Fedi Nuril, Tatjana Saphira, Dewi Sandra, Chel...",Indonesia,"October 18, 2019",2017,TV-14,127 min,"Dramas, Faith & Spirituality, International Mo...","Now a lecturer in Edinburgh, Fahri tries to be..."
7572,s7573,Movie,What's Up With Love?,"Rudy Soedjarwo, Riri Riza","Dian Sastrowardoyo, Nicholas Saputra, Ladya Ch...",Indonesia,"December 8, 2018",2002,R,110 min,"Dramas, International Movies, Romantic Movies",A popular high school girl strains her relatio...


In [3]:
df.shape

(7787, 12)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7787 entries, 0 to 7786
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       7787 non-null   object
 1   type          7787 non-null   object
 2   title         7787 non-null   object
 3   director      5398 non-null   object
 4   cast          7069 non-null   object
 5   country       7280 non-null   object
 6   date_added    7777 non-null   object
 7   release_year  7787 non-null   int64 
 8   rating        7780 non-null   object
 9   duration      7787 non-null   object
 10  listed_in     7787 non-null   object
 11  description   7787 non-null   object
dtypes: int64(1), object(11)
memory usage: 730.2+ KB


In [5]:
df['director'].fillna(' ', inplace=True)
df['cast'].fillna(' ', inplace=True)
df['country'].fillna(' ', inplace=True)

In [6]:
country_name = 'Indonesia'

In [7]:
actors_list = set()
for actor, country in zip(df.cast,df.country):
    if country_name in country:
        for substr in actor.strip().split(','):
            actors_list.add(substr.strip())
if '' in actors_list:
    actors_list.remove('')

In [8]:
directors_list = set()
for director, country in zip(df.director,df.country):
    if country_name in country:
        for substr in director.strip().split(','):
            directors_list.add(substr.strip())
if '' in directors_list:
    directors_list.remove('')

In [9]:
actor_count = pd.DataFrame(columns=['Name','Count'])
# Creating dataframes for actor count and director count and populating them 
for actor in actors_list:
    new_row = {'Name':actor,'Count':len(df[df['cast'].str.contains(actor) & df['country'].str.contains(country_name)])}
    actor_count = actor_count.append(new_row,ignore_index=True)
actor_count.sort_values(by='Count',inplace=True,ascending=False)

In [10]:
actor_count.head()

Unnamed: 0,Name,Count
154,Maudy Koesnaedi,9
361,Reza Rahadian,8
293,Pevita Pearce,6
202,Chelsea Islan,6
180,Cut Mini Theo,5


In [11]:
director_count = pd.DataFrame(columns=['Name','Count'])
for director in directors_list:
    new_row = {'Name':director,'Count':len(df[df['director'].str.contains(director) & df['country'].str.contains(country_name)])}
    director_count = director_count.append(new_row,ignore_index=True)
director_count.sort_values(by='Count',inplace=True,ascending=False)

In [12]:
director_count.head()

Unnamed: 0,Name,Count
32,Rocky Soraya,6
48,Hanung Bramantyo,6
47,Riri Riza,6
27,Angga Dwimas Sasongko,4
13,Timo Tjahjanto,3


In [13]:
top_50_actors = actor_count[actor_count['Name'].str.contains(' ')].head(50)
top_50_directors = director_count[director_count['Name'].str.contains(' ')].head(50)

In [14]:
top_50_actors.sort_values(by='Count',ascending=False).head()

Unnamed: 0,Name,Count
154,Maudy Koesnaedi,9
361,Reza Rahadian,8
293,Pevita Pearce,6
202,Chelsea Islan,6
382,Rio Dewanto,5


In [15]:
top_50_directors.sort_values(by='Count',ascending=False).head()

Unnamed: 0,Name,Count
32,Rocky Soraya,6
47,Riri Riza,6
48,Hanung Bramantyo,6
27,Angga Dwimas Sasongko,4
13,Timo Tjahjanto,3


In [16]:
actorss = []
directorss = []
G = nx.DiGraph()
for actor in top_50_actors['Name']:
    G.add_node(actor)
for director in top_50_directors['Name']:
    G.add_node(director)
for actor in top_50_actors['Name']:
    for director in top_50_directors['Name']:
        if len(df[df['director'].str.contains(director) & df['cast'].str.contains(actor) & df['country'].str.contains(country_name)]) > 0:
            G.add_edge(actor, director)
            actorss.append(actor)
            directorss.append(director)

In [17]:
arrc = np.array([[x,y] for x, y in zip(actorss, directorss)])
arrc2 = arrc.reshape(1,(len(arrc)*2))
arrc2 = arrc2.reshape(-1)
df_arrc = pd.DataFrame(arrc2, columns=["Nodename"])

In [18]:
arr_index = []
for i in range(1, len(arrc)+1):
    arr_index.append(i)
    arr_index.append(i)
df_arrc['c_index'] = arr_index

In [24]:
df_arrc

Unnamed: 0,Nodename,c_index
0,Maudy Koesnaedi,1
1,Hanung Bramantyo,1
2,Maudy Koesnaedi,2
3,Fajar Bustomi,2
4,Maudy Koesnaedi,3
...,...,...
305,Charles Gozali,153
306,Asri Welas,154
307,Sammaria Sari Simanjuntak,154
308,Asri Welas,155


In [19]:
# df_arrc.to_csv('bridgefile.csv', index_label='src-target')

In [20]:
pos=nx.spring_layout(G, k=0.04, iterations=10, scale=100)
nx.set_node_attributes(G,'pos',pos.values())

In [21]:
# #Blue nodes for actors and red for directors
# color_map = []
# for node in G:
#     if node in top_50_actors['Name'].values:
#         color_map.append('blue')
#     else:
#         color_map.append('red')

# plt.figure(1,figsize=(30,30))
# nx.draw(G,node_color=color_map, with_labels=True,font_color='green',font_size=15, pos=pos)
# print('Max connections: '+ str(max(dict(G.degree()).items(), key = lambda x : x[1])))
# plt.show()

In [22]:
positions = pd.DataFrame(pos).transpose()
positions.columns = ['X','Y']
positions

Unnamed: 0,X,Y
Maudy Koesnaedi,-4.993138,5.434124
Reza Rahadian,-55.450455,-22.053553
Pevita Pearce,8.062966,-30.369925
Chelsea Islan,-19.545308,-13.379968
Cut Mini Theo,27.052721,12.879249
...,...,...
Joshua Oppenheimer,-42.183331,-55.311138
Liam O'Donnell,51.586146,-31.233113
Kuntz Agus,-78.649097,-38.607145
Yandy Laurens,-23.786389,-57.896930


In [23]:
# positions.to_csv('nodepositions.csv', encoding='utf-16', index_label='ID')