In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Romance' Movie data
romance = pd.read_csv('data/romance_mov.csv')

In [3]:
romance.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Miss Jerry,1894,Romance,5.4,223
1,La dame aux camélias,1912,"Drama,Romance",5.3,45
2,Amor fatal,1911,"Drama,Romance",7.5,24
3,Anny - en gatepiges roman,1912,"Drama,Romance",4.6,17
4,Den glade løjtnant,1912,Romance,3.8,11


In [4]:
len(romance)

39813

In [5]:
# Sorting the DataFrame from earliest to latest
romance = romance.sort_values(by='Year', ignore_index=True) 
romance.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Miss Jerry,1894,Romance,5.4,223
1,A Viúva Alegre,1909,Romance,5.3,21
2,Sumurûn,1910,Romance,5.0,31
3,Amor fatal,1911,"Drama,Romance",7.5,24
4,Arrah-Na-Pogue,1911,"Drama,Romance",3.2,28


In [6]:
# Grouping by decade and counting entries
romance['Decade'] = (romance['Year'] // 10) * 10
decade_counts = romance.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/romance_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1890      1
1     1900      1
2     1910    254
3     1920    845
4     1930   2225
5     1940   1602
6     1950   2039
7     1960   2551
8     1970   2419
9     1980   2789
10    1990   3948
11    2000   6602
12    2010   9617
13    2020   4920


In [19]:
# Plotly bar
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['orange'], # Musicals is usually  orange
             title='Romance Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Romance' titles
top_romance = romance.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_romance.to_string(index=False))

                                            Title  Rating
                             Soori Love's Sandhya    10.0
                             Not looking for love    10.0
                                      The College     9.9
                                              Ah!     9.9
                            Pyar Kiya Toh Nibhana     9.8
                              A Ghetto Love Story     9.8
                                   Peluang Ketiga     9.8
Sundays After Church Episode 1 Queen or Concubine     9.8
                                 Pop Lock 'n Roll     9.7
                                      Get Over It     9.7


In [9]:
# Find the average rating amongst all 'Romance' titles
average_rating = romance['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 6.091055685328913


In [10]:
# Find 10 highest voted 'Romance' titles
top_vote = romance.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                                Title     Votes
                         Forrest Gump   2360083
                              Titanic   1330578
Eternal Sunshine of the Spotless Mind   1127990
                    Good Will Hunting   1126981
                  Slumdog Millionaire    892882
  Le fabuleux destin d'Amélie Poulain    814660
                      La vita è bella    772355
              Silver Linings Playbook    757282
  The Curious Case of Benjamin Button    717856
                                  Her    697583


In [None]:
# Plotly pie
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Romance Films')
fig_pie.update_layout(title_x=0.46)  # Adjust the title placement
fig_pie.show()

In [12]:
# Find the average vote count amongst all 'Romance' titles
average_votes = romance['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 6.091055685328913
