# 1. Introduction

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

# 2. Data Loading & Cleaning

In [None]:
df = pd.read_csv('indonesian_movies.csv')
df.head()

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime
0,100001,#FriendButMarried 2,2020,Ayudia (Mawar De Jongh) is not satisfied enoug...,Biography,13+,6.5,120,Indonesian,Rako Prijanto,"['Adipati Dolken', 'Mawar Eva de Jongh', 'Vonn...",100 min
1,100002,4 Mantan,2020,"Sara, Airin, Rachel, and Amara were accidental...",Thriller,17+,6.4,8,Indonesian,Hanny Saputra,"['Ranty Maria', 'Jeff Smith', 'Melanie Berentz...",80 min
2,100003,Aku Tahu Kapan Kamu Mati,2020,"After apparent death, Siena is able to see sig...",Horror,13+,5.4,17,Indonesian,Hadrah Daeng Ratu,"['Natasha Wilona', 'Ria Ricis', 'Al Ghazali', ...",92 min
3,100004,Anak Garuda,2020,"Good Morning Indonesia, a school for poor orph...",Adventure,13+,9.1,27,Indonesian,Faozan Rizal,"['Tissa Biani Azzahra', 'Violla Georgie', 'Aji...",129 min
4,100005,Dignitate,2020,Alfi (Al Ghazali) meets Alana (Caitlin Halderm...,Drama,17+,7.6,33,Indonesian,Fajar Nugros,"['Al Ghazali', 'Caitlin Halderman', 'Giorgino ...",109 min


In [None]:
print(df.info(),'\n\n')

print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1272 entries, 0 to 1271
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   movie_id      1272 non-null   int64  
 1   title         1272 non-null   object 
 2   year          1272 non-null   int64  
 3   description   840 non-null    object 
 4   genre         1236 non-null   object 
 5   rating        376 non-null    object 
 6   users_rating  1272 non-null   float64
 7   votes         1272 non-null   object 
 8   languages     1272 non-null   object 
 9   directors     1265 non-null   object 
 10  actors        1272 non-null   object 
 11  runtime       869 non-null    object 
dtypes: float64(1), int64(2), object(9)
memory usage: 119.4+ KB
None 


movie_id          0
title             0
year              0
description     432
genre            36
rating          896
users_rating      0
votes             0
languages         0
directors         7
actors            0

In [None]:
df.votes                = df.votes.apply(lambda x: x.replace(',','')).astype('int')
df.actors               = df.actors.apply(lambda x: x[1:-1].replace("'","").split(', '))
df.runtime              = df.runtime.str.replace(r'\D', '').astype('float')
df.movie_id             = df.movie_id.astype('category')

In [None]:
lis=[]
for i in range(1272):
    if df["year"][i] > 2010:
        lis.append('2010-2020')
    elif df["year"][i] <= 2010 and df["year"][i] > 2000:
        lis.append('2000-2010')
    elif df["year"][i] <= 2000 and df["year"][i] > 1990:
        lis.append('1990-2000')
    elif df["year"][i] <= 1990 and df["year"][i] > 1980:
        lis.append('1980-1990')
    elif df["year"][i] <= 1980 and df["year"][i] > 1970:
        lis.append('1970-1980')
    elif df["year"][i] <= 1970 and df["year"][i] > 1960:
        lis.append('1960-1970')
    elif df["year"][i] <= 1960 and df["year"][i] > 1950:
        lis.append('1950-1960')
    else:
        lis.append('<1950')
df['decade'] = lis

- Changing the 'votes' and 'runtime' data type as numerical type to enabling EDA
- Changing the 'actor' data type into list by splitting the string type values for later analysis
- Adding 'decade' variable to classify the movies into a more relevant timeline

In [None]:
#fill the 'nan' value in variable 'directors'
df.directors[137] = str(df.directors[137]).replace('nan','Amar Mukhi')
df.directors[222] = str(df.directors[222]).replace('nan','Tema Patrosza')
df.directors[426] = str(df.directors[426]).replace('nan','Indra Gunawan')
df.directors[1022] = str(df.directors[1022]).replace('nan','Steady Rimba')
df.directors[1046] = str(df.directors[1046]).replace('nan','S.A. Karim')
df.directors[1255] = str(df.directors[1255]).replace('nan','AN Alcaff')
df.directors[1270] = str(df.directors[1270]).replace('nan','Joshua Wong')

In [None]:
#fill the 'nan' values of variable 'runtime'
df['runtime']=df['runtime'].fillna(df.groupby(['decade'])['runtime'].transform('mean'))
df['runtime'].isnull().sum()

0

- There are some columns of the Data consist of null values such as description, genre, rating, directors, and runtime column. 
- The 'directors' column consist of the least number of null values, and we feel like we can fill it with some information gathered from the internet, so we do it.
- Because the 'runtime' column consists with numerical value, We can fill it with the mean of movies. For each null value, we calculated the mean with every movies in particular decade represents the movie, because we thought that the average of film runtimes is changing over the decades.
- The rest of column with null values are ignored for now, because they consist a non numerical values.

From a quick describe of the data, we learn that:
 - The data are about Indonesian movies made over the years of 1926 - 2020.
 - From the 'year' variable, the median is 2011. It means the data distributed more to the latter year.<br>

We can further run the Exploratory Data Analysis and look for:
 - Most movie genres ever been made over the years
 - Best movies based on users rating
 - Most active Actors who played in the most movies

# 3. Basic Description

In [None]:
df_g                = df.groupby(['genre']).size().reset_index()
df_g ['percentage'] = df.groupby(['genre']).size().apply(lambda x: 100*x/ df.groupby(['genre']).size().sum()).values
df_g.columns        = ['genre','counts','percentage']
fig                 = px.bar (df_g, 
                              x='genre',
                              y = 'counts',
                              color='genre',
                              text=df_g['percentage'].apply(lambda x: '{0:1.2f}%'.format(x)),
                              title= 'Indonesian Movies by Genre')
fig.show()


df_g                = df.groupby(['decade']).size().reset_index()
df_g ['percentage'] = df.groupby(['decade']).size().apply(lambda x: 100*x/ df.groupby(['decade']).size().sum()).values
df_g.columns        = ['decade','counts','percentage']
fig                 = px.bar (df_g, 
                              x = df_g['decade'],
                              y = 'counts',
                              color='decade',
                              text=df_g['percentage'].apply(lambda x: '{0:1.2f}%'.format(x)),
                              title= 'Indonesian Movies by Decade')
fig.update_xaxes(categoryorder='array', categoryarray= ['<1950','1950-1960','1960-1970','1970-1980','1980-1990','1990-2000','2000-2010','2010-2020'])
fig.show()

# 4. Exploratory Data Analysis
From the EDA we found that :
- First Indonesian Movie that been made was a Fantasy movie called 'Loetoeng Kasarung' by L. Heuveldorp back in 1926.
- Indonesian movies are dominated by the genre of 'Drama'.
- Indonesian Movies have been produced most at the 2010s.

In [None]:
df.describe()

Unnamed: 0,year,users_rating,votes,runtime
count,1272.0,1272.0,1272.0,1272.0
mean,2007.023585,6.144418,459.427673,96.895518
std,12.96856,1.389315,6288.88317,13.47968
min,1926.0,1.2,5.0,45.0
25%,2006.0,5.3,12.0,90.0
50%,2011.0,6.4,27.0,97.113861
75%,2016.0,7.1,76.0,100.0
max,2020.0,9.4,187222.0,262.0


In [None]:
df.sort_values(by='year').head()

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade
1271,101272,Loetoeng Kasaroeng,1926,,Fantasy,,7.2,11,Indonesian,L. Heuveldorp,"[Martoana, Oemar, nan, nan, nan, nan, nan, nan...",60.0,<1950
1270,101271,Resia Boroboedoer,1928,,Adventure,,7.0,8,Indonesian,Joshua Wong,"[Olive Young, nan, nan, nan, nan, nan, nan, na...",105.0,<1950
1269,101270,Darah dan Doa,1950,It tells the story of an Indonesian revolution...,Drama,,6.6,27,Indonesian,Usmar Ismail,"[Ella Bergen, Faridah, R.D. Ismail, Del Juzar,...",150.0,<1950
1268,101269,Enam Djam di Djogja,1951,Depicting the celebrated recapture of the town...,Drama,,6.3,9,Indonesian,Usmar Ismail,"[R.D. Ismail, Del Juzar, Aedy Moward, Agus Mul...",116.0,1950-1960
1267,101268,The Tiger from Tjampa,1953,"Set in the 1930s, and narrated like a ballad f...",Drama,,6.4,30,Indonesian,D. Djajakusuma,"[Wahid Chan, Bambang Hermanto, R.D. Ismail, Ma...",97.0,1950-1960


## Top 5 Indonesian movies
We calculate the top 5 Indonesian Movies by The formula for calculating the Top Rated 250 
by IMDB: http://www.imdb.com/chart/top <br>

with a True Bayesian estimate Formula:
weighted rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C where:

- R = average user rating for the movie = (user_rating)
- v = number of votes for the movie = (votes)
- m = minimum votes required to be listed in the Top 250
- C = the mean user rating across the whole report <br>

The minimum votes required (m) is determined by the 0.75 quantile 'votes' number for every movie in the data (76). The mean user rating across the whole data (C) is calculated by sum of ['rating'] * ['votes'] given for every movies across the whole data divided by the sum of votes participated across the whole data.

In [None]:
print ('EDA of votes participated in the data:')
display(df.votes.describe())

UR_x_votes = df.users_rating*df.votes
the_mean_user_rating = UR_x_votes.sum()/df.votes.sum()
R = df.users_rating
C = the_mean_user_rating
v = df.votes
m = df.votes.quantile(0.75)
df_75 = df[df.votes >= m]
df_75['w_rating'] = (v/(v + m)) * R + (m/(v + m)) * C
df_75 = df_75.sort_values(by = 'w_rating', ascending= False).reset_index(drop=True)
df_75.head()

EDA of votes participated in the data:


count      1272.000000
mean        459.427673
std        6288.883170
min           5.000000
25%          12.000000
50%          27.000000
75%          76.000000
max      187222.000000
Name: votes, dtype: float64

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade,w_rating
0,100137,A Man Called Ahok,2018,Depicts the life of the titular former governo...,Biography,13+,8.7,4235,Indonesian,Putrama Tuta,"[Daniel Mananta, Kin Wah Chew, Eric Febrian, D...",102.0,2010-2020,8.671094
1,100173,Jelita Sejuba: Mencintai Kesatria Negara,2018,"Without dating, Jaka immediately proposes for ...",Drama,13+,9.1,240,Indonesian,Ray Nayoan,"[Putri Marino, Wafda Saifan Lubis, Alvaro Mald...",97.550943,2010-2020,8.609446
2,100228,Yowis Ben,2018,Bayu falls in love with a girl and decided to ...,Comedy,13+,8.4,3080,Indonesian,Fajar Nugros,"[Bayu Skak, Cut Meyriska, Brandon Salim, Joshu...",97.550943,2010-2020,8.367739
3,100450,The Raid 2,2014,"Only a short time after the first raid, Rama g...",Action,R,8.0,112091,Indonesian,Gareth Evans,"[Iko Uwais, Arifin Putra, Tio Pakusadewo, Oka ...",150.0,2010-2020,7.999363
4,100023,27 Steps of May,2019,"Following a horrible experience, May has isola...",Drama,17+,8.2,280,Indonesian,Ravi L. Bharwani,"[Raihaanun Soeriaatmadja, Lukman Sardi, Ario B...",112.0,2010-2020,7.956699


### Top 5 Indonesian Movies are:<br>
- A Man Called Ahok (2018) by Putrama Tuta  [8.67]
- Jelita Sejuba: Mencintai Kesatria Negara (2018) by Ray Nayoan  [8.61]
- Yowis Ben (2018) by Fajar Nugros  [8.37]
- The Raid 2 (2014) by Gareth Evans  [8.00]
- 27 Steps of May (2019) by Ravi L. Bharwani  [7.96]

## Indonesian Actors and Directors who played and made the most movies
We calculate actors who played the most movies in two parts of calculation:
- Actors and directors who participated in the most movies
- Actors and directors who participated in the most successful movies by the weighted rating calculated.

In [None]:
def to_1D(series):
 return pd.Series([x for _list in series for x in _list])

In [None]:
actors_1d = to_1D(df.actors)
actors_1d = actors_1d[actors_1d != 'nan']
most5_act = actors_1d.value_counts().head(10).reset_index()
most5_act.columns = ['actor', 'sum']
most5_dir = df['directors'].value_counts().head(10).reset_index()
most5_dir.columns = ['directors','sum'] 

display(most5_act)
display(most5_dir)


actors_1d = to_1D(df_75.actors)
actors_1d = actors_1d[actors_1d != 'nan']
most5_act = actors_1d.value_counts().head(10).reset_index()
most5_act.columns = ['actor', 'sum']
most5_dir = df_75['directors'].value_counts().head(10).reset_index()
most5_dir.columns = ['directors','sum']

display(most5_act)
display(most5_dir)

Unnamed: 0,actor,sum
0,Lukman Sardi,57
1,Reza Rahadian,47
2,Verdi Solaiman,45
3,Jajang C. Noer,45
4,Tio Pakusadewo,41
5,Dwi Sasono,36
6,Ray Sahetapy,36
7,Barry Prima,35
8,Henky Solaiman,35
9,Slamet Rahardjo,35


Unnamed: 0,directors,sum
0,Nayato Fio Nuala,61
1,Rizal Mantovani,34
2,Hanung Bramantyo,31
3,Arizal,28
4,Sisworo Gautama Putra,23
5,Monty Tiwa,23
6,Findo Purwono,21
7,Jose Poernomo,21
8,Rudy Soedjarwo,21
9,Rako Prijanto,20


Unnamed: 0,actor,sum
0,Reza Rahadian,31
1,Lukman Sardi,30
2,Ario Bayu,20
3,Tio Pakusadewo,19
4,Donny Alamsyah,19
5,Verdi Solaiman,18
6,Jajang C. Noer,17
7,Yayu A.W. Unru,16
8,Abimana Aryasatya,16
9,Hannah Al Rashid,14


Unnamed: 0,directors,sum
0,Hanung Bramantyo,18
1,Rizal Mantovani,16
2,Riri Riza,10
3,Angga Dwimas Sasongko,8
4,Monty Tiwa,7
5,Upi Avianto,7
6,Awi Suryadi,7
7,Joko Anwar,7
8,Rudy Soedjarwo,7
9,Anggy Umbara,7


In [None]:
mask = df_75.directors=='Hanung Bramantyo'
df_75[mask]['w_rating'].mean()

6.849183036863551

- Actor who played the most in Indonesian filmatography is Lukman Sardi with 57 Movies in total.
- Directors who mad the movies in Indonesian Filmatography is Nayato Fio Naula with 61 Movies in total. 

- The highest rated movie actor out of 5 of the most contributed actors in Indonesian Filmatography is Reza Rahadian with an average rating of 6.99
- The highest rated director out of 5 of the most contributed directors in Indonesian Filmatography is Hanung Bramantyo with movie rating of 6.85 in average.

In [None]:
y = pd.Series(most5_act.actor).reset_index(drop = True)
means = []
for i in range(len(y)):
    mask = df_75.actors.apply(lambda x: y[i] in x)
    m    = df_75[mask].w_rating.mean()
    means.append(m)
most5_act['w_rating_average']= means
most5_act

Unnamed: 0,actor,sum,w_rating_average
0,Reza Rahadian,31,6.990846
1,Lukman Sardi,30,6.859277
2,Ario Bayu,20,6.798045
3,Tio Pakusadewo,19,6.79397
4,Donny Alamsyah,19,6.979555
5,Verdi Solaiman,18,6.52843
6,Jajang C. Noer,17,7.033288
7,Yayu A.W. Unru,16,7.227131
8,Abimana Aryasatya,16,6.488232
9,Hannah Al Rashid,14,6.160578


In [None]:
UR_x_votes = most5_act['w_rating_average'] * most5_act['sum']
the_mean_user_rating = UR_x_votes.sum()/most5_act['sum'].sum()
R = most5_act['w_rating_average']
C = the_mean_user_rating
v = most5_act['sum']
m = 14
most5_act['w_rating_t'] = (v/(v + m)) * R + (m/(v + m)) * C

In [None]:
mask = df.actors.apply(lambda x : 'Yayu A.W. Unru' in x)
df[mask]

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade
64,100065,Koki-Koki Cilik 2,2019,Cooking Camp is opened again. The little chefs...,Drama,SU,6.8,9,Indonesian,Viva Westi,"[Ringgo Agus Rahman, M. Adhiyat, Faras Fatik, ...",91.0,2010-2020
74,100075,Love for Sale 2,2019,"Tired of matchmaking by his mother, Ican bring...",Drama,17+,7.0,372,Indonesian,Andibachtiar Yusuf,"[Della Dartyan, Adipati Dolken, Ratna Riantiar...",92.0,2010-2020
80,100081,MatiAnak,2019,"Since a new kid arrives at an orphanage, weird...",Horror,17+,6.9,57,Indonesian,Derby Romero,"[Cinta Laura Kiehl, Jovarel Callum, Irsyadilla...",85.0,2010-2020
96,100097,Ratu Ilmu Hitam,2019,Families were terrorized at the orphanage. Som...,Horror,17+,6.8,531,Indonesian,Kimo Stamboel,"[Ario Bayu, Hannah Al Rashid, Adhisty Zara, Mu...",99.0,2010-2020
136,100137,A Man Called Ahok,2018,Depicts the life of the titular former governo...,Biography,13+,8.7,4235,Indonesian,Putrama Tuta,"[Daniel Mananta, Kin Wah Chew, Eric Febrian, D...",102.0,2010-2020
172,100173,Jelita Sejuba: Mencintai Kesatria Negara,2018,"Without dating, Jaka immediately proposes for ...",Drama,13+,9.1,240,Indonesian,Ray Nayoan,"[Putri Marino, Wafda Saifan Lubis, Alvaro Mald...",97.550943,2010-2020
173,100174,Kafir: Bersekutu dengan Setan,2018,A family is being haunted by some strange happ...,Drama,17+,7.0,367,Indonesian,Azhar Kinoi Lubis,"[Putri Ayudya, Sujiwo Tejo, Indah Permatasari,...",97.0,2010-2020
188,100189,Menunggu Pagi,2018,For Bayu (Arya Saloka) who owns a vinyl store ...,Drama,17+,7.4,21,Indonesian,Teddy Soeriaatmadja,"[Arya Saloka, Arya Vasco, Aurélie Moeremans, J...",82.0,2010-2020
211,100212,Something in Between,2018,"Gema, a high school teenager, falls in love wi...",Drama,13+,8.2,92,Indonesian,Asep Kusdinar,"[Jefri Nichol, Amanda Rawles, Naufal Samudra W...",100.0,2010-2020
256,100257,Marlina the Murderer in Four Acts,2017,Marlina lives quietly in Sumba until one day a...,Drama,21+,7.0,2350,Indonesian,Mouly Surya,"[Marsha Timothy, Egy Fedly, Tumpal Tampubolon,...",93.0,2010-2020


In [None]:
display(most5_act.sort_values(['sum'], ascending=False))
display(most5_act.sort_values(['w_rating_t'], ascending=False))

Unnamed: 0,actor,sum,w_rating_average,w_rating_t
0,Reza Rahadian,31,6.990846,6.93602
1,Lukman Sardi,30,6.859277,6.845068
2,Ario Bayu,20,6.798045,6.80487
3,Tio Pakusadewo,19,6.79397,6.802731
4,Donny Alamsyah,19,6.979555,6.909582
5,Verdi Solaiman,18,6.52843,6.653638
6,Jajang C. Noer,17,7.033288,6.934535
7,Yayu A.W. Unru,16,7.227131,7.034626
8,Abimana Aryasatya,16,6.488232,6.640546
9,Hannah Al Rashid,14,6.160578,6.487599


Unnamed: 0,actor,sum,w_rating_average,w_rating_t
7,Yayu A.W. Unru,16,7.227131,7.034626
0,Reza Rahadian,31,6.990846,6.93602
6,Jajang C. Noer,17,7.033288,6.934535
4,Donny Alamsyah,19,6.979555,6.909582
1,Lukman Sardi,30,6.859277,6.845068
2,Ario Bayu,20,6.798045,6.80487
3,Tio Pakusadewo,19,6.79397,6.802731
5,Verdi Solaiman,18,6.52843,6.653638
8,Abimana Aryasatya,16,6.488232,6.640546
9,Hannah Al Rashid,14,6.160578,6.487599


In [None]:
df_n = pd.DataFrame()
for i in range ( len(most5_act)) :
    y = most5_act.loc[i,'actor']
    mask = df_75['actors'].apply(lambda x: y in x)
    df_a = df_75[mask]
    df_a['actor_name']= y
    df_n = df_n.append(df_a)
df_n = df_n.reset_index(drop = True)
df_n


Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade,w_rating,actor_name
0,100056,Imperfect,2019,"Being born fat and has dark skins, it feels li...",Comedy,13+,7.8,624,Indonesian,Ernest Prakasa,"[Jessica Mila, Reza Rahadian, Yasmin Napper, K...",113.000000,2010-2020,7.719693,Reza Rahadian
1,100252,Kartini,2017,This movie follows the story of the Indonesian...,Biography,SU,7.7,336,Indonesian,Hanung Bramantyo,"[Dian Sastrowardoyo, Reza Rahadian, Adinia Wir...",122.000000,2010-2020,7.582002,Reza Rahadian
2,100535,Habibie & Ainun,2012,This movie is based on the memoir written by t...,Biography,13+,7.6,1783,Indonesian,Faozan Rizal,"[Reza Rahadian, Bunga Citra Lestari, Tio Pakus...",120.000000,2010-2020,7.577937,Reza Rahadian
3,100508,The Sinking of Van Der Wijck,2013,Adapted from a classic novel with the same tit...,Drama,13+,7.6,1023,Dutch,Sunil Soraya,"[Herjunot Ali, Pevita Pearce, Reza Rahadian, R...",164.000000,2010-2020,7.562680,Reza Rahadian
4,100336,Rudy Habibie,2016,This movie follows the story of the 3rd Presid...,Biography,13+,7.5,644,Indonesian,Hanung Bramantyo,"[Reza Rahadian, Chelsea Islan, Ernest Prakasa,...",97.550943,2010-2020,7.453590,Reza Rahadian
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,100299,Warkop DKI Reborn: Jangkrik Boss Part 2,2017,"Dono, Kasino, and Indro's adventure continues....",Adventure,13+,5.5,404,Indonesian,Anggy Umbara,"[Abimana Aryasatya, Vino G. Bastian, Tora Sudi...",97.550943,2010-2020,5.747052,Hannah Al Rashid
196,100043,DreadOut,2019,Six friends want to become popular by recordin...,Adventure,17+,5.3,390,Indonesian,Kimo Stamboel,"[Caitlin Halderman, Jefri Nichol, Marsha Aruan...",97.000000,2010-2020,5.587092,Hannah Al Rashid
197,100305,Bulan Terbelah di Langit Amerika 2,2016,When Hanum and Rangga decided to return to Vie...,Adventure,13+,4.4,153,Indonesian,Rizal Mantovani,"[Acha Septriasa, Abimana Aryasatya, Nino Ferna...",100.000000,2010-2020,5.282904,Hannah Al Rashid
198,100171,Jailangkung 2,2018,A girl watched an old videotape owned by his f...,Horror,13+,4.0,109,Indonesian,Rizal Mantovani,"[Amanda Rawles, Jefri Nichol, Hannah Al Rashid...",83.000000,2010-2020,5.257216,Hannah Al Rashid


In [None]:
df_g                = df_n.groupby(['actor_name','year']).size().reset_index()
df_g ['percentage'] = df_n.groupby(['actor_name','year']).size().apply(lambda x: 100*x/ df_n.groupby(['actor_name','year']).size().sum()).values
df_g.columns        = ['actor_name','year','counts','percentage']
df_g

Unnamed: 0,actor_name,year,counts,percentage
0,Abimana Aryasatya,2009,1,0.5
1,Abimana Aryasatya,2011,1,0.5
2,Abimana Aryasatya,2012,1,0.5
3,Abimana Aryasatya,2013,3,1.5
4,Abimana Aryasatya,2015,3,1.5
...,...,...,...,...
97,Yayu A.W. Unru,2015,1,0.5
98,Yayu A.W. Unru,2016,1,0.5
99,Yayu A.W. Unru,2017,4,2.0
100,Yayu A.W. Unru,2018,4,2.0


In [None]:
fig = px.bar(df_g, 
            x = 'year',
            y = 'counts',
            color='actor_name',
            text=df_g['counts'].apply(lambda x: '{0:9.0f}'.format(x)),
            title= 'Best 5 Indonesian Actors best on weighted rating of movies')
fig.show()

In [None]:
df_g                = df_n.groupby(['actor_name','year']).size().reset_index()
df_g['w_rating']    = df_n.groupby(['actor_name','year'])['w_rating'].mean().reset_index()['w_rating']
df_g.columns = ['actor_name','year', 'counts', 'w_rating']

fig = px.bar(df_g,
            x = 'year',
            y = 'counts',
            color= 'actor_name',
            text = df_g['w_rating'].apply(lambda x: '{0:9.2f}'.format(x)))
fig.show()

In [None]:
df_n = pd.DataFrame()
for i in range ( len(most5_dir.head())) :
    y = most5_dir.loc[i,'directors']
    mask = df_75['directors']==y
    df_a = df_75[mask]
    df_a['directors']= y
    df_n = df_n.append(df_a)
df_n = df_n.reset_index(drop = True)
df_n

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade,w_rating
0,100252,Kartini,2017,This movie follows the story of the Indonesian...,Biography,SU,7.7,336,Indonesian,Hanung Bramantyo,"[Dian Sastrowardoyo, Reza Rahadian, Adinia Wir...",122.0,2010-2020,7.582002
1,100336,Rudy Habibie,2016,This movie follows the story of the 3rd Presid...,Biography,13+,7.5,644,Indonesian,Hanung Bramantyo,"[Reza Rahadian, Chelsea Islan, Ernest Prakasa,...",97.550943,2010-2020,7.45359
2,100964,Catatan Akhir Sekolah,2005,A journey of three students for making a short...,Comedy,,7.4,447,Indonesian,Hanung Bramantyo,"[Joanna Alexandra, Vino G. Bastian, Marcel Cha...",109.0,2000-2010,7.35064
3,100947,Jomblo,2006,Four college best friends with their funny lif...,Comedy,,7.2,317,Indonesian,Hanung Bramantyo,"[Dennis Adhiswara, Rianti Cartwright, Rizky Ha...",97.113861,2000-2010,7.17299
4,100893,Verses of Love,2008,A man who try to get through a complicated rel...,Drama,17+,7.1,981,Indonesian,Hanung Bramantyo,"[Fedi Nuril, Rianti Cartwright, Carissa Putri,...",130.0,2000-2010,7.097148
5,100286,Surga Yang Tak Dirindukan 2,2017,Sequel to the 2015 film 'Surga Yang Tak Dirind...,Drama,13+,7.1,359,Indonesian,Hanung Bramantyo,"[Laudya Cynthia Bella, Fedi Nuril, Raline Shah...",121.0,2010-2020,7.093069
6,100654,Tanda Tanya,2011,A story of interconnecting lives in modern day...,Drama,,7.1,188,Indonesian,Hanung Bramantyo,"[Reza Rahadian, Revalina S. Temat, Agus Kuncor...",100.0,2010-2020,7.088579
7,100048,Habibie & Ainun 3,2019,This movie follows the story of Hasri Ainun Be...,Biography,13+,7.0,133,Indonesian,Hanung Bramantyo,"[Maudy Ayunda, Jefri Nichol, Reza Rahadian, Lu...",121.0,2010-2020,7.021938
8,100500,Soekarno: Indonesia Merdeka,2013,"This movie follows the life of Soekarno, the f...",Biography,,7.0,428,Indonesian,Hanung Bramantyo,"[Ario Bayu, Muhammad Abbe, Moch. Achir, Norman...",137.0,2010-2020,7.009097
9,100559,Perahu Kertas,2012,"Tidal relationship between two human beings, K...",Drama,,7.0,519,Indonesian,Hanung Bramantyo,"[Maudy Ayunda, Adipati Dolken, Reza Rahadian, ...",111.0,2010-2020,7.007706


In [None]:
df_g                = df_n.groupby(['directors','year']).size().reset_index()
df_g ['percentage'] = df_n.groupby(['directors','year']).size().apply(lambda x: 100*x/ df_n.groupby(['directors','year']).size().sum()).values
df_g.columns        = ['directors','year','counts','percentage']
df_g.head()

Unnamed: 0,directors,year,counts,percentage
0,Angga Dwimas Sasongko,2010,1,1.694915
1,Angga Dwimas Sasongko,2014,1,1.694915
2,Angga Dwimas Sasongko,2015,1,1.694915
3,Angga Dwimas Sasongko,2016,1,1.694915
4,Angga Dwimas Sasongko,2017,2,3.389831


In [None]:
fig = px.bar(df_g, 
            x = 'year',
            y = 'counts',
            color='directors',
            text=df_g['counts'].apply(lambda x: '{0:9.0f}'.format(x)),
            title= 'Best 5 Indonesian Directors based on weighted rating of movies')
fig.show()

In [None]:
df_g                = df_n.groupby(['directors','year']).size().reset_index()
df_g['w_rating']    = df_n.groupby(['directors','year'])['w_rating'].mean().reset_index()['w_rating']
df_g.columns = ['directors','year', 'counts', 'w_rating']

fig = px.bar(df_g,
            x = 'year',
            y = 'counts',
            color= 'directors',
            text = df_g['w_rating'].apply(lambda x: '{0:9.2f}'.format(x)))
fig.show()

In [None]:
mask = df.directors=='Angga Dwimas Sasongko'
df[df.directors=='Angga Dwimas Sasongko'][df['year']==2020]

Unnamed: 0,movie_id,title,year,description,genre,rating,users_rating,votes,languages,directors,actors,runtime,decade
12,100013,Nanti Kita Cerita Tentang Hari Ini,2020,"Three siblings live in happy-looking families,...",Drama,13+,7.5,678,Indonesian,Angga Dwimas Sasongko,"[Rio Dewanto, Sheila Dara Aisha, Rachel Amanda...",121.0,2010-2020


- Movie with the highest w_rating shows the people's hype for a film on a certain year. For example, in 2000 people are hyping about Riri Riza's Petualangan Sherina. In 2005, Its Riri Riza's Gie and Hanung Bramantyo's Catatan Akhir Sekolah
- In 2016 people are into movies the most, because of the year's many and high average w_rating of the movies