In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Drama'' Movie data
drama = pd.read_csv('data/drama_mov.csv')

In [3]:
drama.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,L'enfant prodigue,1907,Drama,5.6,31
1,Robbery Under Arms,1907,Drama,4.3,28
2,Amleto,1908,Drama,3.2,33
3,Don Quijote,1908,Drama,4.3,23
4,Hamlet,1910,Drama,4.5,41


In [4]:
len(drama)

165242

In [5]:
# Sorting the DataFrame from earliest to latest
drama = drama.sort_values(by='Year', ignore_index=True) # Cleaner look
drama.head()


Unnamed: 0,Title,Year,Genres,Rating,Votes
0,La vie et la passion de Jésus Christ,1903,"Biography,Drama",6.5,755
1,S. Lubin's Passion Play,1903,Drama,4.6,12
2,Dingjunshan,1905,Drama,6.3,54
3,Vie et Passion de N.S Jésus-Christ,1907,"Biography,Drama,History",6.6,88
4,El grito de Dolores o La independencia de México,1907,Drama,5.0,39


In [6]:
# Grouping by decade and counting entries
drama['Decade'] = (drama['Year'] // 10) * 10
decade_counts = drama.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/drama_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1900     19
1     1910   1438
2     1920   2355
3     1930   4752
4     1940   4893
5     1950   7522
6     1960  10316
7     1970  12956
8     1980  14861
9     1990  16273
10    2000  26095
11    2010  43018
12    2020  20744


In [7]:
# Plotting with Plotly
fig = px.bar(decade_counts, x='Decade', y='Count', color='Decade', 
             color_discrete_sequence=px.colors.sequential.Plasma, 
             title='Drama Releases by Decade')
# Outline bars for clarity
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [15]:
# Find 10 highest rated 'Drama' titles
top_rating = drama.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_rating.to_string(index=False))

                                 Title  Rating
                              The Poet    10.0
The Secret Diaries of the Film Censors    10.0
                         Ka Mon Bajwat    10.0
                                  Ixel    10.0
                             Displaced    10.0
                   Rainy in Glenageary    10.0
                            D on Dance    10.0
                               Kaputol    10.0
                                Rijali    10.0
                        Gallo de pelea    10.0


In [9]:
# Find the average rating amongst all 'Drama' titles
average_rating = drama['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 6.258359254910979


In [None]:
# Find 10 highest voted 'Drama' titles
top_vote = drama.nlargest(10, 'Votes')[['Title', 'Votes']]
# Add space between columns for easier reading by adding a blank column with (2) spaces
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                                            Title     Votes
                         The Shawshank Redemption   3018863
                                  The Dark Knight   2995152
                                       Fight Club   2441331
                                     Forrest Gump   2360083
                                     Pulp Fiction   2316350
                                     Interstellar   2309781
                                    The Godfather   2106844
The Lord of the Rings: The Fellowship of the Ring   2091781
    The Lord of the Rings: The Return of the King   2062898
                            The Dark Knight Rises   1899721


In [25]:
# use plotly to make pie chart
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Drama Films')
fig_pie.update_layout(title_x=0.4)  # Adjust the title placement
fig_pie.show()

In [12]:
# Find the average vote count amongst all 'Drama' titles
average_votes = drama['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 6.258359254910979
