In [118]:
import numpy as np
import pandas as pd
import re

In [148]:
def read_name_file(filename):
    names = []
    with open(filename, "r") as f:
        for line in f:
            if not line.startswith("#") and line.strip() != "":  # remove comments and empty lines
                names.append(line.strip().lower())
    return names

In [146]:
def read_script_file(filename):
    names = []
    with open(filename, "r") as f:
        for line in f:
            if not line.startswith("#") and line.strip() != "":  # remove comments and empty lines
                names.append(line.strip())
    return names

In [147]:
read_script_file('../data/Scripts/2012.txt')

['2012',
 'Written by',
 'Roland Emmerich & Harald Kloser',
 'Second Draft',
 'February 19th, 2008',
 'OVER BLACK',
 "We listen to the immortal music of Mozart's Adagio of the",
 'Clarinet Concerto in A.',
 'FADE UP',
 'EXT. THE SOLAR SYSTEM',
 'Space, infinite and empty.',
 'But then, slowly all nine planets of our Solar System move',
 'into frame and align.',
 'The last of them is the giant, burning sphere of the sun.',
 'Just as the sun enters frame, a solar storm of gigantic',
 'proportion unfolds. The eruptions shoot thousands of miles',
 'into the blackness of space.',
 'FADE TO BLACK',
 '2009',
 'FADE UP',
 'EXT. COUNTRY SIDE/INDIA - SUNSET',
 "Mozart's concerto filters from a jeep's stereo, fighting the",
 'drumming sounds of the monsoon rain. PROF. FREDERIC WEST, 66,',
 'listens to the music.',
 'An Indian BOY playing by the roadside steers his wooden toy',
 'ship across a puddle.',
 'The Professor turns to his driver, pointing to the boy.',
 'PROF. WEST',
 'Watch out!',
 "But

In [149]:
female_names = "../data/female.txt"
male_names = "../data/male.txt"
male_names = read_name_file(male_names)
female_names = read_name_file(female_names)

In [157]:
def character_list(file):
    names = []
    with open(file, 'r') as f:
        for line in f:
            names.append(''.join(re.findall(r'[A-Z]', line)))
    
    #now let's search through the list and find only human names
    female_characters = []
    male_characters = []
    for i in names:
        if i.lower() in female_names:
            female_characters.append(i)
        elif i.lower() in male_names:
            male_characters.append(i)
        else:
            pass
    return set(female_characters)

In [158]:
character_list('../data/Scripts/2012.txt')

{'ADRIAN',
 'AG',
 'DI',
 'JO',
 'KAT',
 'KATE',
 'KI',
 'LA',
 'LAURA',
 'LILLY',
 'LIN',
 'SALLY',
 'SASHA',
 'TAMARA',
 'TIA',
 'TONY'}

In [104]:
with open('../data/Scripts/2012.txt', 'r') as f:
    script = f.read()

In [102]:
def baseline_bechdel_test(script):
    """Implements a baseline bechdel test which return True if two women are speaking
        Args:
            script: a parsed script from the IMSDBParser class
        Returns:
            Whether the test is passed
    """
    prev_speaker = None
    test_passed = False
    for speaker, line in read_name_file(script):
        if speaker in female_names:
            if prev_speaker != speaker and prev_speaker is not None: #two women are talking!
                test_passed = True
                break
            else:
                prev_speaker = speaker
        else:
            prev_speaker = None
    return test_passed

In [86]:
baseline_bechdel_test('../data/Scripts/2012.txt')

False

In [109]:
def get_scene_boundaries(filepath):

    script_content = []
    scene_points = []
    with open(filepath) as fp:
        contents = fp.readlines()
        for idx, line in enumerate(contents):
            if ":SC:" in line or "EXT." in line or "INT." in line:
                scene_points.append(idx)
                script_content.append(line)

    start_end = []
    for s in range(len(scene_points) - 1):
        current_item, next_item = scene_points[s], scene_points[s + 1]
        start_end.append((current_item, next_item-1))

    scene_boundaries = []
    for bound in start_end:
        scene_boundaries.append(script_content[bound[0]:bound[1]])

    return scene_boundaries

In [110]:
get_scene_boundaries('../data/Scripts/2012.txt')

[["          EXT. KATE'S HOUSE/LOS ANGELES - MORNING\n",
  '          EXT. SHIP DECK/SAN FRANCISCO HARBOR - DAY\n',
  "          INT. LAURA'S BEDROOM/D.C. - EARLY MORNING\n",
  '          EXT. STREETS/PARIS - NIGHT\n',
  '          EXT. ROAD/YELLOWSTONE NATIONAL PARK - DAY\n',
  '          INT. OVAL OFFICE/WHITE HOUSE - MORNING\n',
  '          EXT. FOREST TRAIL/YELLOWSTONE NATIONAL PARK - DAY\n',
  '          EXT. RIDGE/YELLOWSTONE NATIONAL PARK - DAY\n',
  '          EXT. EMPTY LAKE BED/YELLOWSTONE NATIONAL PARK - DAY\n',
  '          EXT. RESEARCH FACILITY/YELLOWSTONE NATIONAL PARK - DAY\n',
  '          EXT. FOREST TRAIL/YELLOWSTONE NATIONAL PARK - LATER\n',
  '          EXT. TENT/YELLOWSTONE NATIONAL PARK - DUSK\n',
  '          EXT. RESEARCH FACILITY/YELLOWSTONE NATIONAL PARK - DUSK\n',
  '          INT. LIMO/YELLOWSTONE NATIONAL PARK - DUSK\n',
  "          INT. CHARLIE'S RV/YELLOWSTONE NATIONAL PARK - DUSK\n",
  '          EXT. PARKING LOT OF SUPERMARKET/LOS ANGELES - NIGHT\n',

In [111]:
def read_name_file(filename):
    """Read a file containing a list of names
        Args:
            filename: a file containing a name for each row
        Returns:
            An array of names from the input file
    """
    names = []
    with open(filename, "r") as f:
        for line in f:
            if not line.startswith("#") and line.strip() != "":  # remove comments and empty lines
                names.append(line.strip().lower())
    return names


def baseline_bechdel_test(script):
    """Implements a baseline bechdel test which return True if two women are speaking
        Args:
            script: a parsed script from the IMSDBParser class
        Returns:
            Whether the test is passed
    """
    prev_speaker = None
    test_passed = False
    for speaker, line in script:
        if speaker in female_names:
            if prev_speaker != speaker and prev_speaker is not None: #two women are talking!
                test_passed = True
                break
            else:
                prev_speaker = speaker
        else:
            prev_speaker = None
    return test_passed

In [112]:
baseline_bechdel_test('../data/Scripts/2012.TXT')

ValueError: not enough values to unpack (expected 2, got 1)

In [113]:
with open('../data/Scripts/2012.TXT', 'r') as f:
    script = f.read()

In [115]:
type(script)

str

In [37]:
def read_name_file(filename):
    """Read a file containing a list of names
        Args:
            filename: a file containing a name for each row
        Returns:
            An array of names from the input file
    """
    names = []
    with open(filename, "r") as f:
        for line in f:
            if not line.startswith("#") and line.strip() != "":  # remove comments and empty lines
                names.append(line.strip().lower())
    return names


def baseline_bechdel_test(script):
    """Implements a baseline bechdel test which return True if two women are speaking
        Args:
            script: a parsed script from the IMSDBParser class
        Returns:
            Whether the test is passed
    """
    prev_speaker = None
    test_passed = False
    for speaker, line in script:
        if speaker in female_names:
            if prev_speaker != speaker and prev_speaker is not None: #two women are talking!
                test_passed = True
                break
            else:
                prev_speaker = speaker
        else:
            prev_speaker = None
    return test_passed


def complete_bechdel_test(script):
    """Implements the complete bechdel test
        Args:
            script: a parsed script from the IMSDBParser class
        Returns:
            Whether the test is passed
    """
    prev_speaker = None
    test_passed = False
    for speaker, line in script:
        if speaker in female_names: #woman talking
            if prev_speaker != speaker and prev_speaker is not None: #two women are talking...
                talking_about_men=False #are they talking about men?
                for name in male_names:
                    if name in line:
                        talking_about_men=True
                if talking_about_men:  # They're talking about men
                    test_passed = False
                else:  # They're not talking about men!
                    test_passed= True
            else:  # prev speaker was a male/this speaker
                prev_speaker = speaker
        else:  # if a woman-to-woman male-free discussion was completed, return True
            if test_passed==True:
                break
            else:
                prev_speaker = None
    return test_passed

female_names = "../data/female.txt"
male_names = "../data/male.txt"
male_names = read_name_file(male_names)
female_names = read_name_file(female_names)
#parser = IMSDBParser(male_names+female_names, n_scripts=100)
#scripts = parser.get_all_scripts()
#successful_movies = []  # this will store movies which passed the baseline test
#complete_successful_movies = []  # this will store movies which passed the complete test
#unsuccessful_movies = []  # this will store movies which didn't pass the complete test
#for script in scripts:
    #if baseline_bechdel_test(script):
    #    successful_movies.append(script)
#    if complete_bechdel_test(script):
#        complete_successful_movies.append(script)
#    else:
#        unsuccessful_movies.append(script)
print ("Bechdel Test completed!")


Bechdel Test completed!
