In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Family' Movie data
family = pd.read_csv('data/family_mov.csv')

In [3]:
family.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Life of Moses,1909,"Biography,Drama,Family",5.5,65
1,"His Majesty, the Scarecrow of Oz",1914,"Adventure,Comedy,Family",5.3,553
2,The Patchwork Girl of Oz,1914,"Adventure,Comedy,Family",5.4,603
3,Alice in Wonderland,1915,"Adventure,Family,Fantasy",6.1,856
4,The Babes in the Woods,1917,"Drama,Family,Fantasy",5.7,43


In [4]:
len(family)

15579

In [5]:
# Sorting the DataFrame from earliest to latest
family = family.sort_values(by='Year', ignore_index=True) 
family.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Life of Moses,1909,"Biography,Drama,Family",5.5,65
1,"His Majesty, the Scarecrow of Oz",1914,"Adventure,Comedy,Family",5.3,553
2,The Patchwork Girl of Oz,1914,"Adventure,Comedy,Family",5.4,603
3,Alice in Wonderland,1915,"Adventure,Family,Fantasy",6.1,856
4,Snow White,1916,"Adventure,Family,Fantasy",3.8,61


In [6]:
# Grouping by decade and counting entries
family['Decade'] = (family['Year'] // 10) * 10
decade_counts = family.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/family_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1900      1
1     1910     13
2     1920     44
3     1930    193
4     1940    295
5     1950    627
6     1960    824
7     1970   1280
8     1980   1632
9     1990   1543
10    2000   2271
11    2010   4920
12    2020   1936


In [7]:
# Plotly bars
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['yellow'], # Family films usually comical so yellow
             title='Family Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Family' titles
top_family = family.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_family.to_string(index=False))

                           Title  Rating
                        The Poet    10.0
            Kids on Kids on Kids    10.0
Auf den Spuren des Hans im Glück    10.0
               It's a Love Thang    10.0
                     Dhh Lekacha     9.9
               Hansel and Gretel     9.8
                          Partav     9.8
               The Road to Truth     9.8
                    Amche Samsar     9.8
              An American Posada     9.8


In [9]:
# Find the average rating amongst all 'Family' titles
average_rating = family['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 6.2178445343090045


In [10]:
# Find 10 highest voted 'Family' titles
top_vote = family.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                                        Title     Votes
                                       WALL·E   1248803
Harry Potter and the Deathly Hallows - Part 2    987688
                Sen to Chihiro no kamikakushi    897618
        Harry Potter and the Sorcerer's Stone    896345
      Harry Potter and the Chamber of Secrets    722822
     Harry Potter and the Prisoner of Azkaban    722684
          Harry Potter and the Goblet of Fire    711186
                                   Home Alone    691178
    Harry Potter and the Order of the Phoenix    660589
Harry Potter and the Deathly Hallows - Part 1    625351


In [11]:
# Plotly pie
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Family Films')
fig_pie.update_layout(title_x=0.4)  # Adjust the title placement
fig_pie.show()

In [12]:
# Find the average vote count amongst all 'Family' titles
average_votes = family['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 6.2178445343090045
