In [1]:
import numpy as np
import json
import os
import pandas as pd

from pathlib import Path

In [2]:
death_verbs_patient = {
    "kill", "murder", "execute", "assassinate", "slay",
    "behead", "crucify", "hang", "drown",
    "poison", "decapitate", "sacrifice", "eradicate",
    "annihilate", "exterminate", "obliterate", "destroy",
    "massacre", "euthanize", "lynch", "terminate"
}
    
death_verbs_agent = {
    "die", "commit", "perish", "sacrifice",
    "pass away", "expire", "succumb", 
    "surrender life", "bleed out"
}

def count_character_deaths(characters_bags):
    death_count = 0
    character_death_flags = {}
    
    for character in characters_bags:
        char_name = character['name']
        char_bag = character['bag']
        
        died = False
        
        for verb_type, verb in char_bag:
            if verb_type == 'patient verb' and verb in death_verbs_patient:
                died = True
                break
                
            if verb_type == 'agent verb' and verb in death_verbs_agent:
                died = True
                break
        
        character_death_flags[char_name] = 1 if died else 0
        
        if died:
            death_count += 1
    
    return death_count, character_death_flags

In [3]:
data_path = Path('data/processed')

records = {}

for movie in os.listdir(data_path):
    character_bags = json.load(open(data_path / movie))

    movie_id = movie.split('_')[-1].split('.')[0]

    num_deaths, character_deaths =  count_character_deaths(character_bags)

    records[movie_id] = {'num_deaths': num_deaths,
                         'num_characters': len(character_bags),
                         'character_deaths': character_deaths}

In [4]:
dict(sorted(records.items(), key=lambda item: item[1]['num_deaths'], reverse=True))

{'3238135': {'num_deaths': 9,
  'num_characters': 12,
  'character_deaths': {'Simon Cruise': 0,
   'Greg': 1,
   'Alicia': 0,
   'Karma': 1,
   'Cynthia': 1,
   'Captain Victor Kirk': 1,
   'Salish': 1,
   'Jordan Casper': 1,
   'Rudy Curien': 0,
   'Liberty': 1,
   'Hugh': 1,
   'Castillo': 1}},
 '714334': {'num_deaths': 8,
  'num_characters': 11,
  'character_deaths': {'Freddy Krueger': 1,
   'Jason Voorhees': 1,
   'Lori Campbell': 1,
   'Trey': 1,
   'Kia Waterson': 0,
   'Gibb Smith': 1,
   'Will Rollins': 0,
   'Mark Davis': 1,
   'Bill Freeburg': 1,
   'Deputy Scott Stubbs': 0,
   'Charlie Linderman': 1}},
 '600351': {'num_deaths': 8,
  'num_characters': 14,
  'character_deaths': {'Cotton Weary': 1,
   'Sidney Prescott': 0,
   'Christine Hamilton': 1,
   'Det. Mark Kincaid': 0,
   'Gale Weathers': 1,
   "Dwight 'Dewey' Riley": 0,
   'Sarah Darling': 0,
   'Jennifer Jolie': 1,
   'Steven Stone': 0,
   'Tom Prinze': 1,
   'Angelina Tyler': 1,
   'Tyson Fox': 0,
   'Roman Bridger':

In [5]:
print(f'Number of movies = {len(records)}')
print(f'Number of characters = {np.sum([v['num_characters'] for k, v in records.items()])}')
print(f'Number of deaths = {np.sum([v['num_deaths'] for k, v in records.items()])}\n')

print(f'Average number of characters per movie = {np.mean([v['num_characters'] for k, v in records.items()]).round(2)}')
print(f'Average number of deaths per movie = {np.mean([v['num_deaths'] for k, v in records.items()]).round(2)}')
print(f'Average percentage of dead characters = {np.mean([v['num_deaths'] / v['num_characters'] for k, v in records.items()]).round(2)}')

Number of movies = 18990
Number of characters = 57949
Number of deaths = 7232

Average number of characters per movie = 3.05
Average number of deaths per movie = 0.38
Average percentage of dead characters = 0.11


## Examine specific case

In [6]:
character_bags = json.load(open('data/processed/character_bags_3238135.json'))

In [7]:
tokens = pd.read_csv('data/interim/corenlp_plot_summaries/tokens_3238135.csv')

In [8]:
" ".join(tokens['word'])

"The film begins with Simon and Greg planning to take a boat to an island rave party . They meet up with Alicia , Karma and Cynthia . Karma has a crush on Simon , Simon has a crush on Alicia and Cynthia is Greg 's girlfriend . When they arrive at the dock , they find the boat to take them to the island has already left . They hitch a ride instead with Victor Kirk and his first mate Salish . A cop named Jordan Casper tries to stop them since Kirk is a smuggler , but they leave anyway . Arriving at the island party site they find it a deserted mess . Alicia , Karma and Simon start searching for the others leaving Cynthia and Greg behind . The two start to making out in a tent , but Greg leaves to urinate . Left alone , Cynthia is killed by a group of zombies . Meanwhile , the others find an old house . Inside , they find Rudy , Liberty and Hugh who tell them zombies attacked the rave , killing everyone . The six go back to the rave site to get the others . Meanwhile , Salish is killed wh