In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Importing 'Crime' Movie data
crime = pd.read_csv('data/crime_mov.csv')

In [3]:
crime.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Ansigttyven I,1910,Crime,3.9,21
1,Zigomar contre Nick Carter,1912,"Crime,Thriller",6.0,54
2,What 80 Million Women Want,1913,"Crime,Drama,Romance",4.1,56
3,Fantômas I: À l'ombre de la guillotine,1913,"Crime,Drama",6.9,2612
4,In the Bishop's Carriage,1913,"Crime,Drama",5.6,27


In [4]:
len(crime)

32051

In [5]:
# Sorting the DataFrame from earliest to latest
crime = crime.sort_values(by='Year', ignore_index=True) 
crime.head()

Unnamed: 0,Title,Year,Genres,Rating,Votes
0,Ansigttyven I,1910,Crime,3.9,21
1,Harry the Footballer,1911,"Adventure,Crime,Drama",4.3,34
2,Zigomar contre Nick Carter,1912,"Crime,Thriller",6.0,54
3,Le mystère des roches de Kador,1912,"Crime,Drama",6.6,452
4,L'enfant de Paris,1913,"Crime,Drama",7.2,483


In [6]:
# Grouping by decade and counting entries
crime['Decade'] = (crime['Year'] // 10) * 10
decade_counts = crime.groupby('Decade').size().reset_index(name='Count')

# Printing results
print(decade_counts)

# Saving results to a text file
with open('data/crime_decades.txt', 'w') as f:
    f.write(decade_counts.to_string())

    Decade  Count
0     1910    138
1     1920    294
2     1930   1334
3     1940   1163
4     1950   1737
5     1960   2290
6     1970   2858
7     1980   2845
8     1990   3715
9     2000   4623
10    2010   7262
11    2020   3792


In [7]:
# Plotting with Plotly
fig = px.bar(decade_counts, x='Decade', y='Count', # color='Decade', 
             color_discrete_sequence=['purple'], # Crime is usually purple
             title='Crime Releases by Decade')
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
# Label the bar for 2010 pre-covid vertically
fig.add_annotation(x=2010, y=decade_counts.loc[decade_counts['Decade'] == 2010, 'Count'].values[0],
                   text="Pre-COVID",
                   showarrow=True,
                   arrowhead=2,
                   ax=0,
                   ay=-40,
                   font=dict(size=12))
fig.show()


In [8]:
# Find 10 highest rated 'Crime' titles
top_crime = crime.nlargest(10, 'Rating')[['Title', 'Rating']]
# Print list
print(top_crime.to_string(index=False))

                                    Title  Rating
La vida por mi barrio 13 (Mafia mexicana)    10.0
                      Der Mann von drüben     9.8
                       Party im Zwielicht     9.8
                       Juventud en drogas     9.8
                            Tujhko Pukare     9.8
                                Asatveera     9.8
                            Dheera Samrat     9.8
                                   Redrum     9.7
                                      4N6     9.7
           Die Dame in der schwarzen Robe     9.6


In [9]:
# Find the average rating amongst all 'Crime' titles
average_rating = crime['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 6.008676796355808


In [10]:
# Find the average rating amongst all 'Crime' titles
average_rating = crime['Rating'].mean()
print("Average Rating:", average_rating)

Average Rating: 6.008676796355808


In [11]:
# Find 10 highest voted 'Crime' titles
top_vote = crime.nlargest(10, 'Votes')[['Title', 'Votes']]
top_vote.insert(1, ' ', ' ')
# Print list
print(top_vote.to_string(index=False))

                   Title     Votes
         The Dark Knight   2989960
            Pulp Fiction   2311032
           The Godfather   2103011
                   Se7en   1888593
 The Wolf of Wall Street   1663985
The Silence of the Lambs   1617769
                   Joker   1589935
            The Departed   1474380
          The Green Mile   1470910
   The Godfather Part II   1416708


In [12]:
# use plotly to make pie chart
fig_pie = px.pie(top_vote, names='Title', values='Votes', title='Top 10 Voted Crime Films')
fig_pie.update_layout(title_x=0.45)  # Adjust the title placement
fig_pie.show()

In [13]:
# Find the average vote count amongst all 'Crime' titles
average_votes = crime['Votes'].mean()
print("Average Vote Count:", average_rating)

Average Vote Count: 6.008676796355808
