In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Adventure' Movie data
adventure = pd.read_csv('data/adventure_mov.csv')

In [3]:
adventure.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Story of the Kelly Gang,1906,"Action,Adventure,Biography",6.0,976
1,The Fairylogue and Radio-Plays,1908,"Adventure,Fantasy",5.2,78
2,Don Juan de Serrallonga,1910,"Adventure,Drama",3.5,22
3,L'inferno,1911,"Adventure,Drama,Fantasy",7.0,3739
4,The Adventures of Kathlyn,1913,Adventure,5.5,48


In [4]:
len(adventure)

21473

In [5]:
# Sorting the DataFrame from earliest to latest
adventure = adventure.sort_values(by='Year', ignore_index=True) 
adventure.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,The Story of the Kelly Gang,1906,"Action,Adventure,Biography",6.0,976
1,The Fairylogue and Radio-Plays,1908,"Adventure,Fantasy",5.2,78
2,Sonho de Valsa,1909,"Adventure,Drama",2.4,25
3,Don Juan de Serrallonga,1910,"Adventure,Drama",3.5,22
4,L'inferno,1911,"Adventure,Drama,Fantasy",7.0,3739


In [6]:
# Grouping by decade and counting entries
adventure['Decade'] = (adventure['Year'] // 10) * 10
decade_counts = adventure.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/adventure_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1900      3
1     1910    327
2     1920    761
3     1930    800
4     1940    764
5     1950   1241
6     1960   1914
7     1970   2074
8     1980   1857
9     1990   1584
10    2000   2474
11    2010   5393
12    2020   2281


In [7]:
# Plotly bar
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['lawngreen'], # Adventure is usually  green
             title='Adventure Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Adventure' titles
top_adventure = adventure.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_adventure.to_string(index=False))

                           Title  Rating
Auf den Spuren des Hans im Glück    10.0
               Independent Roads     9.9
    The Treasure of Pancho Villa     9.9
               Hansel and Gretel     9.8
             Flying Over Everest     9.8
                Buried in Tucson     9.8
                           Parto     9.8
                 The Inventurers     9.8
             McTaggart's Fortune     9.8
              Borderline Forever     9.8


In [9]:
# Find the average rating amongst all 'Adventure' titles
average_rating = adventure['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 5.867484748288549


In [10]:
# Find the average rating amongst all 'Adventure' titles
average_rating = adventure['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 5.867484748288549


In [11]:
# Find 10 highest voted 'Adventure' titles
top_vote = adventure.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                                            Title     Votes
                                        Inception   2657138
                                     Interstellar   2296921
The Lord of the Rings: The Fellowship of the Ring   2088682
    The Lord of the Rings: The Return of the King   2059827
            The Lord of the Rings: The Two Towers   1856216
                                        Gladiator   1738157
                             Inglourious Basterds   1661387
                                        Star Wars   1496080
   Star Wars: Episode V - The Empire Strikes Back   1428476
                                           Avatar   1422305


In [12]:
# Plotly pie
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Adventure Films')
fig_pie.update_layout(title_x=0.397)  # Adjust the title placement
fig_pie.show()

In [13]:
# Find the average vote count amongst all 'Adventure' titles
average_votes = adventure['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 5.867484748288549
