In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Thriller' Movie data
thriller = pd.read_csv('data/thriller_mov.csv')

In [3]:
thriller.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,What Happened to Mary,1912,"Action,Drama,Thriller",6.2,36
1,Zigomar contre Nick Carter,1912,"Crime,Thriller",6.0,54
2,Der Andere,1913,"Drama,Thriller",5.4,126
3,"The $5,000,000 Counterfeiting Plot",1914,"Crime,Thriller",6.8,29
4,After Five,1915,"Comedy,Crime,Thriller",4.8,26


In [4]:
len(thriller)

30760

In [5]:
# Sorting the DataFrame from earliest to latest
thriller = thriller.sort_values(by='Year', ignore_index=True) 
thriller.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,One Hundred Years Ago,1911,"Drama,Thriller",2.3,19
1,Zigomar contre Nick Carter,1912,"Crime,Thriller",6.0,54
2,What Happened to Mary,1912,"Action,Drama,Thriller",6.2,36
3,Strike,1912,"Drama,Thriller",5.0,12
4,"Zigomar, peau d'anguille - Épisode 1: La résur...",1913,"Action,Thriller",5.8,23


In [6]:
# Grouping by decade and counting entries
thriller['Decade'] = (thriller['Year'] // 10) * 10
decade_counts = thriller.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/thriller_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1910     34
1     1920     63
2     1930    142
3     1940    226
4     1950    457
5     1960    880
6     1970   1500
7     1980   1908
8     1990   3379
9     2000   4826
10    2010  10477
11    2020   6868


In [21]:
# Plotly bar
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['aqua'], # Thriller is basically action so another blue
             title='Thriller Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Thriller' titles
top_thriller = thriller.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_thriller.to_string(index=False))

                    Title  Rating
                   Vo tme    10.0
            Alone at Home     9.8
    The Trees of the East     9.8
                    Rugna     9.8
   The Sound of Southside     9.8
   Nasoor - Let's Restart     9.8
                    Ajaan     9.8
            Dheera Samrat     9.8
        The Platinum Loop     9.8
Virinchi Independent Film     9.7


In [9]:
# Find the average rating amongst all 'Thriller' titles
average_rating = thriller['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 5.598881664499351


In [10]:
# Find the average rating amongst all 'Thriller' titles
average_rating = thriller['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 5.598881664499351


In [11]:
# Find 10 highest voted 'Thriller' titles
top_vote = thriller.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                   Title     Votes
   The Dark Knight Rises   1896633
The Silence of the Lambs   1617769
                   Joker   1589935
          Shutter Island   1531324
            The Departed   1474380
                 Memento   1367851
       Kill Bill: Vol. 1   1239769
          Reservoir Dogs   1124287
               Gone Girl   1110965
  No Country for Old Men   1109883


In [12]:
# use plotly to make pie chart
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Thriller Films')
fig_pie.update_layout(title_x=0.45)  # Adjust the title placement
fig_pie.show()

In [13]:
# Find the average vote count amongst all 'Thriller' titles
average_votes = thriller['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 5.598881664499351
