In [73]:
import pandas as pd
import pickle

data = pd.read_csv("../data/data_cleaned.csv")

In [95]:
data.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,is_movie,duration_minutes
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,Unknown,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.",True,90
1,s2,TV Show,Blood & Water,Unknown,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng",South Africa,2021-09-24,2021,TV-MA,1 Season,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.",False,0
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera",Unknown,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Action & Adventure","To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.",False,0
3,s4,TV Show,Jailbirds New Orleans,Unknown,Unknown,Unknown,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.",False,0
4,s5,TV Show,Kota Factory,Unknown,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar",India,2021-09-24,2021,TV-MA,1 Season,"International TV Shows, Romantic TV Shows, TV Comedies","In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.",False,0


# Analyse Statistique et Manipulation

#### Les 10 pays ayant le plus de films et séries sur Netflix

In [75]:
country_with_most_mvtv = data.groupby('country').agg(count=('show_id', 'count'))
country_with_most_mvtv.sort_values('count', ascending=False, inplace=True)
country_with_most_mvtv = country_with_most_mvtv.loc[country_with_most_mvtv.index != 'Unknown', ].head(10)
country_with_most_mvtv

Unnamed: 0_level_0,count
country,Unnamed: 1_level_1
United States,2818
India,972
United Kingdom,419
Japan,245
South Korea,199
Canada,181
Spain,145
France,124
Mexico,110
Egypt,106


#### La répartition des films et séries par année de sortie

In [94]:
repartition_mvtv_by_year = data.pivot_table(index='release_year', columns='type', values='show_id', aggfunc='count').reset_index()
repartition_mvtv_by_year.loc[repartition_mvtv_by_year['Movie'].isnull(), 'Movie'] = 0
repartition_mvtv_by_year.loc[repartition_mvtv_by_year['TV Show'].isnull(), 'TV Show'] = 0
repartition_mvtv_by_year.head()

type,release_year,Movie,TV Show
0,1925,0.0,1.0
1,1942,2.0,0.0
2,1943,3.0,0.0
3,1944,3.0,0.0
4,1945,3.0,1.0


#### Le réalisateur avec le plus grand nombre de films sur Netflix

In [77]:
real_with_most_mvtv = data.groupby('director').agg(countMv=('show_id', 'count'))
real_with_most_mvtv.sort_values('countMv', ascending=False, inplace=True)
real_with_most_mvtv.loc[real_with_most_mvtv.index != 'Unknown', ].head(1)

Unnamed: 0_level_0,countMv
director,Unnamed: 1_level_1
Rajiv Chilaka,19


#### Les 5 acteurs les plus présents dans les films et séries Netflix

In [78]:
actors_most_present = data.groupby('cast').agg(count=('show_id', 'count'))
actors_most_present.sort_values('count', ascending=False, inplace=True)
actors_most_present.loc[actors_most_present.index != 'Unknown', ].head(5)

Unnamed: 0_level_0,count
cast,Unnamed: 1_level_1
David Attenborough,19
"Vatsal Dubey, Julie Tejwani, Rupa Bhimani, Jigna Bhardwaj, Rajesh Kava, Mousam, Swapnil",14
Samuel West,10
Jeff Dunham,7
Craig Sechler,6


#### Le genre de film/série le plus fréquent sur Netflix

In [79]:
genre_most_present = data.groupby('listed_in').agg(count=('show_id', 'count'))
genre_most_present.sort_values('count', ascending=False, inplace=True)
genre_most_present.head(1)

Unnamed: 0_level_0,count
listed_in,Unnamed: 1_level_1
"Dramas, International Movies",362


#### La durée moyenne des films en minutes

In [80]:
movies_duration = data.loc[data['is_movie'], 'duration_minutes']
movies_duration = movies_duration.mean()
print('La durée moyenne des films est de', round(movies_duration, 2), 'minutes')

La durée moyenne des films est de 99.57 minutes


#### La proportion des contenus classés "TV-MA" par rapport aux autres ?

In [81]:
proportion_rating = (data.loc[data['rating'] == 'TV-MA', 'show_id'].count()) * 100 / data['show_id'].count()
print('La proportion des contenus classés "TV-MA" par rapport aux autre est de', round(proportion_rating, 2), '%')

La proportion des contenus classés "TV-MA" par rapport aux autre est de 36.46 %


#### La proportion de films par rapport aux séries TV sur Netflix ?

In [82]:
proportion_mv = (data.loc[data['is_movie'], 'show_id'].count()) * 100 / data['show_id'].count()
proportion_tv = (data.loc[data['is_movie']==False, 'show_id'].count()) * 100 / data['show_id'].count()

print('Il y a', round(proportion_mv, 2), '% de films contre', round(proportion_tv, 2), '% de séries sur Netflix')

Il y a 69.62 % de films contre 30.38 % de séries sur Netflix


#### Les CINQ pays qui produisent le plus de séries TV sur Netflix ?

In [83]:
isTvShow = data.loc[data['is_movie']==False, ]
isTvShow = isTvShow.groupby('country').agg(total=('show_id', 'count')).sort_values('total', ascending=False)
isTvShow.loc[isTvShow.index != 'Unknown', ].head(5)

Unnamed: 0_level_0,total
country,Unnamed: 1_level_1
United States,760
United Kingdom,213
Japan,169
South Korea,158
India,79


In [84]:
with open('variables.pkl', 'wb') as f:
    pickle.dump({'country_with_most_mvtv':country_with_most_mvtv}, f)