In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Horror' Movie data
horror = pd.read_csv('data/horror_mov.csv')

In [3]:
horror.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Der Student von Prag,1913,"Drama,Fantasy,Horror",6.4,2533
1,The Avenging Conscience: or 'Thou Shalt Not Kill',1914,"Crime,Drama,Horror",6.4,1504
2,The Ghost Breaker,1914,"Adventure,Horror",4.8,49
3,Der Golem,1914,Horror,6.7,1280
4,Der Hund von Baskerville,1914,"Crime,Horror,Mystery",5.6,167


In [4]:
len(horror)

25647

In [5]:
# Sorting the DataFrame from earliest to latest
horror = horror.sort_values(by='Year', ignore_index=True) 
horror.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Hidaka iriai zakura,1909,"Drama,Horror",5.9,15
1,Botan dôrô,1910,"Drama,Horror",4.5,13
2,Trilby,1912,Horror,3.9,33
3,Satana,1912,"Drama,Horror",5.3,37
4,I misteri della psiche,1912,"Drama,Fantasy,Horror",6.3,17


In [6]:
# Grouping by decade and counting entries
horror['Decade'] = (horror['Year'] // 10) * 10
decade_counts = horror.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/horror_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1900      1
1     1910     68
2     1920    101
3     1930    140
4     1940    190
5     1950    327
6     1960    721
7     1970   1483
8     1980   1805
9     1990   1673
10    2000   3882
11    2010   9593
12    2020   5663


In [7]:
# Plotly bar
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['limegreen'], # Horror is usually  dark green oddly
             title='Horror Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Horror' titles
top_horror = horror.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_horror.to_string(index=False))

                                     Title  Rating
            T.T.T. [Terror in Teruel Town]     9.6
              The Forest Through the Trees     9.6
                      Sandook - Ek Rahasya     9.5
                              Mashaarojinn     9.5
                   Guard: Revenge for Love     9.5
                             God Loves You     9.4
                    Dead Slate: Beginnings     9.4
Michael and Ghostface: Best Buds the Movie     9.4
                               Clownface 3     9.4
                       Happy Birthday Luci     9.4


In [9]:
# Find the average rating amongst all 'Horror' titles
average_rating = horror['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 4.996252973057278


In [10]:
# Find 10 highest voted 'Horror' titles
top_vote = horror.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

          Title     Votes
    The Shining   1156771
          Alien   1006850
    I Am Legend    843154
         Aliens    802962
American Psycho    768157
        Get Out    749922
    World War Z    747182
         Psycho    746547
           Jaws    688133
     Zombieland    641092


In [11]:
# Plotly pie
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Horror Films')
fig_pie.update_layout(title_x=0.45)  # Adjust the title placement
fig_pie.show()

In [12]:
# Find the average vote count amongst all 'Horror' titles
average_votes = horror['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 4.996252973057278
