In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Action' Movie data
action = pd.read_csv('data/action_mov.csv')

In [3]:
action.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Story of the Kelly Gang,1906,"Action,Adventure,Biography",6.0,976
1,What Happened to Mary,1912,"Action,Drama,Thriller",6.2,36
2,Who Will Marry Mary?,1913,"Action,Adventure",5.2,29
3,Cameo Kirby,1914,"Action,Drama,Romance",6.5,18
4,The Exploits of Elaine,1914,Action,6.2,107


In [4]:
len(action)

35070

In [5]:
# Sorting the DataFrame from earliest to latest
action = action.sort_values(by='Year', ignore_index=True) 
action.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Story of the Kelly Gang,1906,"Action,Adventure,Biography",6.0,976
1,Chûshingura,1910,"Action,Drama",5.6,29
2,Attack on the Gold Escort,1911,"Action,Drama",4.2,26
3,What Happened to Mary,1912,"Action,Drama,Thriller",6.2,36
4,Cooee and the Echo,1912,"Action,Adventure",5.4,25


In [6]:
# Grouping by decade and counting entries
action['Decade'] = (action['Year'] // 10) * 10
decade_counts = action.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/action_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1900      1
1     1910    118
2     1920    416
3     1930    659
4     1940    507
5     1950    720
6     1960   1657
7     1970   3122
8     1980   4058
9     1990   4672
10    2000   5252
11    2010   9216
12    2020   4672


In [7]:
# Plotting with Plotly
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['dodgerblue'], # Action is usually  blue
             title='Action Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Action' titles
top_action = action.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_action.to_string(index=False))

                                        Title  Rating
                          The last USSR blues    10.0
                                       Vo tme    10.0
                 The Treasure of Pancho Villa     9.9
                                Tujhko Pukare     9.8
                                      One Way     9.8
                           Azotes de Barrio 2     9.8
                                      The RVM     9.8
                         Susuko ba ako, inay?     9.7
                             The Knight Squad     9.7
OF THE SEA: a film about California Fishermen     9.7


In [9]:
# Find the average rating amongst all 'Action' titles
average_rating = action['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 5.720379241516967


In [10]:
# Find 10 highest voted 'Action' titles
top_vote = action.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                                         Title     Votes
                               The Dark Knight   2989960
                                     Inception   2657138
                                    The Matrix   2135884
                         The Dark Knight Rises   1896633
                                     Gladiator   1738157
                                 Batman Begins   1632908
                                  The Avengers   1497659
                                     Star Wars   1496080
Star Wars: Episode V - The Empire Strikes Back   1428476
                                        Avatar   1422305


In [11]:
# use plotly to make pie chart
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Action Films')
fig_pie.update_layout(title_x=0.4)  # Adjust the title placement
fig_pie.show()

In [12]:
# Find the average vote count amongst all 'Action' titles
average_votes = action['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 5.720379241516967
