# Setup

## Setup - Imports

In [None]:
# python base imports
import datetime
import json

# import six
import six

print( "packages imported at " + str( datetime.datetime.now() ) )

## Setup - Initialize Django

- Back to [Table of Contents](#Table-of-Contents)

First, initialize my dev django project, so I can run code in this notebook that references my django models and can talk to the database using my project's settings.

In [None]:
%run ../django_init.py

In [None]:
# django imports
from django.contrib.auth.models import User

# sourcenet imports
from context_text.shared.context_text_base import ContextTextBase

# context_analysis imports
from context_analysis.network.network_person_info import NetworkPersonInfo

# sourcenet imports
from context_text.models import Article
from context_text.models import Article_Author
from context_text.models import Article_Data
from context_text.models import Article_Subject
from context_text.models import Newspaper
from context_text.models import Person

# article coding
from context_text.article_coding.article_coder import ArticleCoder
#from context_text.article_coding.article_coding import ArticleCoding
from context_text.article_coding.open_calais_v2.open_calais_v2_article_coder import OpenCalaisV2ArticleCoder

# import class that actually processes requests for outputting networks.
from context_text.export.network_output import NetworkOutput

# context_text shared
from context_text.shared.context_text_base import ContextTextBase

print( "django model packages imported at " + str( datetime.datetime.now() ) )

## Setup - shared variables

In [None]:
# get ArticleCoding instance.
#article_coding = ArticleCoding()

# automated coding user
automated_coder = ArticleCoder.get_automated_coding_user()

# newspapers for Grand Rapids Press and Detroit News.
grand_rapids_press = Newspaper.objects.get( newsbank_code = "GRPB" )
detroit_news = Newspaper.objects.get( newsbank_code = "DTNB" )

# OpenCalais v2 coder type
ocv2_coder_type = OpenCalaisV2ArticleCoder.CONFIG_APPLICATION

# Filter Article_Data

In [None]:
article_data_qs = None
article_data_count = None

# get all Article_Data.
article_data_qs = Article_Data.objects.all()

# how many we starting with?
article_data_count = article_data_qs.count()

print( "Starting with {} total Article_Data instances.".format( article_data_count ) )

## Detect single-name people within Article_Data

- code to filter out those with single names... where?

    - notebook where work was done originally (just notes - it was manual): [prelim_month-create_Reliability_Names_data.ipynb](./methods/data_creation/prelim_month-create_Reliability_Names_data.ipynb)
    - code to filter to just single first names is in `context_analysis/views.py --> reliability_names_disagreement_view()`:
    
            if ( reliability_names_only_first_name == True ):

                # to start, first name needs to not be null and
                #     not be empty.
                reliability_names_qs = reliability_names_qs.filter( 
                    Q( person__first_name__isnull = False ) & ~Q( person__first_name = "" ),
                    Q( person__middle_name__isnull = True ) | Q( person__middle_name = "" ),
                    Q( person__last_name__isnull = True ) | Q( person__last_name = "" ),
                    Q( person__name_prefix__isnull = True ) | Q( person__name_prefix = "" ),
                    Q( person__name_suffix__isnull = True ) | Q( person__name_suffix = "" ),
                    Q( person__nickname__isnull = True ) | Q( person__nickname = "" ),
                )

            #-- END only first name --#

Person in Article_Data

- Article_Data

    - Article_Author
    - Article_Subject
    - both Article_Author and Article_Subject have "person" relation that ties to person instance for name and other details.
    - They also have name fields:
    
        - name
        - verbatim_name
        - lookup_name
        
    - Should mine the above to see how widely and reliably the name fields were set - could just look for names with no internal spaces there...

In [None]:
# start with Article_Data QuerySet
article_data_qs = Article_Data.objects.all()

### Article_Subject

- Some have no name, verbatim_name, or lookup_name.

    - small sample included valid people where this data was just not captured early on.
    - see if I can write a script to populate from saved data.

- Some, name/verbatim_name/lookup_name are single name. These we'd want to omit, as long as this is actually the verbatim name from the article.

    - spot-check in [View article + coding](https://research.local/research/context/text/article/article_data/view_with_text/)
    - looks like it is, and so omit if the subject's name in Article_Subject has no spaces.
    - check programatically for single-name people (not only is verbatim name in Article_Subject single word, but also look at the different name elements of the "Person" associated with the single-name mention to see if the Person has just first names, also). Two reasons that single  of match:
    
        - from early days, program created person with single name part from article.
        - ...?  I already forgot.

In [None]:
# declare variables
work_article_subject_qs = None
test_article_subject_qs = None
match_count = None
article_subject = None
my_name = None
my_verbatim_name = None
my_lookup_name = None
do_output_details = None

# configure
do_output_details = False
do_limit_to_sources = False

# set up base queryset
work_article_subject_qs = Article_Subject.objects.all()
match_count = work_article_subject_qs.count()
print( "total Article_Subject count = {}".format( match_count ) )

# just automated coder.
work_article_subject_qs = work_article_subject_qs.filter( article_data__coder = automated_coder )
match_count = work_article_subject_qs.count()
print( "automated Article_Subject count = {}".format( match_count ) )

# only OpenCalais V2.
work_article_subject_qs = work_article_subject_qs.filter( article_data__coder_type = ocv2_coder_type )
match_count = work_article_subject_qs.count()
print( "automated OpenCalais v.2 Article_Subject count = {}".format( match_count ) )

# only Grand Rapids Press.
work_article_subject_qs = work_article_subject_qs.filter( article_data__article__newspaper = grand_rapids_press )
match_count = work_article_subject_qs.count()
print( "automated GRP Article_Subject count = {}".format( match_count ) )

# only subjects of type source?
if ( do_limit_to_sources == True ):

    work_article_subject_qs = work_article_subject_qs.filter( subject_type = Article_Subject.SUBJECT_TYPE_QUOTED )
    match_count = work_article_subject_qs.count()
    print( "automated GRP Article_Subject quoted acount = {}".format( match_count ) )

#-- END check if we limit to quoted/sources --#
    
# look for any where name is not NULL.
work_article_subject_qs = work_article_subject_qs.filter( name__isnull = False )
match_count = work_article_subject_qs.count()
print( "only those with name set - name__isnull = False --> match count = {}".format( match_count ) )

# look for any that have no space in name.
test_article_subject_qs = work_article_subject_qs.exclude( name__contains = " " )
match_count = test_article_subject_qs.count()
print( "single_name records - exclude name__contains = \"<space>\" --> match count = {}".format( match_count ) )

if ( do_output_details == True ):

    for article_subject in work_article_subject_qs[ 0 : 10 ]:

        # get all names
        my_name = article_subject.name
        my_verbatim_name = article_subject.verbatim_name
        my_lookup_name = article_subject.lookup_name

        print( "\n{article_subject}:".format( article_subject = article_subject ) )
        print( "-          name: {}".format( my_name ) )
        print( "- verbatim_name: {}".format( my_verbatim_name ) )
        print( "-   lookup_name: {}".format( my_lookup_name ) )

        my_article_data = article_subject.article_data
        print( "- Article_Data: {}".format( my_article_data ) ) 

        my_article = my_article_data.article
        print( "- Article: {}".format( my_article ) ) 

    #-- END loop over sample of Article_Subject instances --#

    print( "\n" )
    
#-- END check if output details --#

# look for any that have no space in verbatim_name.
test_article_subject_qs = work_article_subject_qs.exclude( verbatim_name__contains = " " )
match_count = test_article_subject_qs.count()
print( "exclude verbatim_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in lookup_name.
test_article_subject_qs = work_article_subject_qs.exclude( lookup_name__contains = " " )
match_count = test_article_subject_qs.count()
print( "exclude lookup_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

if ( do_output_details == True ):

    for article_subject in work_article_subject_qs[ 0 : 10 ]:

        # get all names
        my_name = article_subject.name
        my_verbatim_name = article_subject.verbatim_name
        my_lookup_name = article_subject.lookup_name

        print( "\n{article_subject}:".format( article_subject = article_subject ) )
        print( "-          name: {}".format( my_name ) )
        print( "- verbatim_name: {}".format( my_verbatim_name ) )
        print( "-   lookup_name: {}".format( my_lookup_name ) )

        my_article_data = article_subject.article_data
        print( "- Article_Data: {}".format( my_article_data ) ) 

        my_article = my_article_data.article
        print( "- Article: {}".format( my_article ) ) 

    #-- END loop over sample of Article_Subject instances --#
    
#-- END check if output details. --#

#### Article_Subject - collect information

In [None]:
#=============================================================================#
# declare variables
#=============================================================================#
article_person = None
my_name = None
my_verbatim_name = None
my_verbatim_name_part_list = None
my_verbatim_name_part_count = None
my_verbatim_name_has_spaces = None
my_official_name_part_count = None
my_person = None
person_name_string = None
person_name_part_count = None
is_single_token = None
is_single_name = None

# declare variables - auditing
do_output_progress = None
print_every_x_records = None
record_counter = None
my_start_dt = None
current_dt = None
current_elapsed = None
total_elapsed = None
total_average = None
previous_dt = None

# declare variables - collect data
article_person_count = None
my_names_different_count = None
my_names_same_count = None
has_spaces_count = None
single_name_count = None
single_name_mismatch_count = None
single_name_mismatch_list = None
single_token_to_multi_name_person_count = None
single_token_to_multi_name_list = None
multi_token_to_single_name_person_count = None
person_single_name_count = None
my_name_counts_different_count = None
name_counts_different_count = None
same_name_as_person_count = None
different_name_from_person_count = None

#=============================================================================#
# config/init
#=============================================================================#
do_output_progress = True
print_every_x_records = 10000
my_start_dt = datetime.datetime.now()
previous_dt = my_start_dt
single_name_mismatch_list = list()
single_token_to_multi_name_list = list()

# sort QuerySet by ID.
work_article_subject_qs = work_article_subject_qs.order_by( 'id' )

# initialize counts
record_counter = 0
my_names_different_count = 0
my_names_same_count = 0
has_spaces_count = 0
single_name_count = 0
single_name_mismatch_count = 0
single_token_to_multi_name_person_count = 0
multi_token_to_single_name_person_count = 0
person_single_name_count = 0
my_name_counts_different_count = 0
name_counts_different_count = 0
same_name_as_person_count = 0
different_name_from_person_count = 0

#=============================================================================#
# check out selected records.
#=============================================================================#
article_person_count = work_article_subject_qs.count()
for article_person in work_article_subject_qs:
    
    # increment overall counter
    record_counter += 1
    
    # init
    my_verbatim_name_has_spaces = None
    
    # get my name strings
    my_name = article_person.name
    my_verbatim_name = article_person.verbatim_name
    
    # are name and verbatim name the same?
    if ( my_name == my_verbatim_name ):
        
        # same!
        my_names_same_count += 1
        
    else:
        
        # not the same!
        my_names_different_count += 1
        
    #-- END check if names same or different within Article_Person record --#
    
    # does verbatim_name have spaces?
    my_verbatim_name_part_list = article_person.get_verbatim_name_token_list()
    my_verbatim_name_part_count = len( my_verbatim_name_part_list )
    if ( my_verbatim_name_part_count > 1 ):
        
        # has at least one space.
        has_spaces_count += 1
        is_single_token = False
        
    else:
        
        # no spaces, single name?
        single_name_count += 1
        is_single_token = True
        
    #-- END check if spaces present --#
    
    # retrieve person
    my_person = article_person.person
    
    # use person to get official name part counts
    my_official_name_part_count = article_person.get_name_part_count_from_name( my_verbatim_name )
    person_name_part_count = my_person.get_name_part_count()
    
    # how many parts from person instance?
    if ( person_name_part_count == 1 ):
        
        person_single_name_count += 1
        is_single_name = True
        
    else:
        
        is_single_name = False
        
    #-- END check if person is single-name --#
    
    # string count comparison: does official match space-based?
    if ( my_verbatim_name_part_count != my_official_name_part_count ):
        
        # they do not. Interesting.
        my_name_counts_different_count += 1
        
    #-- END check if string-based name counts match --#
    
    # single name mismatch?
    if ( is_single_name != is_single_token ):
        
        single_name_mismatch_count += 1
        single_name_mismatch_list.append( article_person.id )
        
        # is it single token to multi-name person?
        if ( ( is_single_token == True ) and ( is_single_name == False ) ):
            
            # single token in Article_Person name maps to Person with multiple name parts.
            single_token_to_multi_name_person_count += 1
            single_token_to_multi_name_list.append( article_person.id )
            
            # TODO - do something here? - if single token to multi-name person, probably wrong. --#
            
        elif ( ( is_single_token == False ) and ( is_single_name == True ) ):
            
            # multi-token name in Article_Person maps to Person with single name part.
            multi_token_to_single_name_person_count += 1
            
        #-- END check of verbatim has one token, name has multiple name parts --#
        
    #-- END check if single-name booleans match --#
    
    # compare string to person
    if ( my_official_name_part_count != person_name_part_count ):      
        
        # they do not. Interesting.
        name_counts_different_count += 1
        
    #-- END check if string part count matches person --#
    
    # retrieve person name string
    person_name_string = my_person.get_name_string()
    #print ( "----> person name string: \"{}\"".format( person_name_string ) )
    
    # are the two strings the same?
    if ( my_verbatim_name == person_name_string ):
        
        # same.
        same_name_as_person_count += 1
    
    else:
        
        # not same.
        different_name_from_person_count += 1
        
    #-- END check if string from article and rendered person name are the same. --#

    # output a progress message?
    if ( ( ( record_counter % print_every_x_records ) == 0 )
        and ( do_output_progress == True ) ):

        # basic timing analysis.
        current_dt = datetime.datetime.now()
        current_elapsed = current_dt - previous_dt
        total_elapsed = current_dt - my_start_dt
        total_average = total_elapsed / record_counter
        previous_dt = current_dt

        status_message = "processed {counter} of {count} records @ {right_now} ( timing: last {current_count} elapsed = {current_elapsed}; total elapsed = {total_elapsed}; average = {total_average} ).".format(
            counter = record_counter,
            count = article_person_count,
            right_now = current_dt,
            current_count = print_every_x_records,
            current_elapsed = current_elapsed,
            total_elapsed = total_elapsed,
            total_average = total_average
        )
        print( status_message )
        #self.output_log_message(
        #    status_message,
        #    method_IN = me,
        #    indent_with_IN = "\n\n----> ",
        #    log_level_code_IN = logging.INFO,
        #    do_print_IN = True
        #)

    #-- END periodic status update. --#
    
#-- END loop over busted Article_Author instances --#

print( "Processed {} people:".format( article_person_count ) )
print( "- my_names_different_count = {}".format( my_names_different_count ) )
print( "- my_names_same_count = {}".format( my_names_same_count ) )
print( "- has_spaces_count = {}".format( has_spaces_count ) )
print( "- single_name_count = {}".format( single_name_count ) )
print( "- person_single_name_count = {}".format( person_single_name_count ) )
print( "- single_name_mismatch_count = {}".format( single_name_mismatch_count ) )
print( "- single_token_to_multi_name_person_count = {}".format( single_token_to_multi_name_person_count ) )
print( "- multi_token_to_single_name_person_count = {}".format( multi_token_to_single_name_person_count ) )
print( "- my_name_counts_different_count = {}".format( my_name_counts_different_count ) )
print( "- name_counts_different_count = {}".format( name_counts_different_count ) )
print( "- same_name_as_person_count = {}".format( same_name_as_person_count ) )
print( "- different_name_from_person_count = {}".format( different_name_from_person_count ) )


In [None]:
print( "Processed {} people:".format( article_person_count ) )
print( "- my_names_different_count = {}".format( my_names_different_count ) )
print( "- my_names_same_count = {}".format( my_names_same_count ) )
print( "- has_spaces_count = {}".format( has_spaces_count ) )
print( "- single_name_count = {}".format( single_name_count ) )
print( "- person_single_name_count = {}".format( person_single_name_count ) )
print( "- single_name_mismatch_count = {}".format( single_name_mismatch_count ) )
print( "- single_token_to_multi_name_person_count = {}".format( single_token_to_multi_name_person_count ) )
print( "- my_name_counts_different_count = {}".format( my_name_counts_different_count ) )
print( "- name_counts_different_count = {}".format( name_counts_different_count ) )
print( "- same_name_as_person_count = {}".format( same_name_as_person_count ) )
print( "- different_name_from_person_count = {}".format( different_name_from_person_count ) )
if ( ( single_name_mismatch_list is not None ) and ( len( single_name_mismatch_list ) > 0 ) ):
    
    print( "- single_name_mismatch_list: \n{}".format( single_name_mismatch_list ) )
    
#-- END see if there are mismatch list items. --#
if ( ( single_token_to_multi_name_list is not None ) and ( len( single_token_to_multi_name_list ) > 0 ) ):
    
    print( "- single_token_to_multi_name_list: \n{}".format( single_token_to_multi_name_list ) )
    
#-- END see if there are mismatch list items. --#

### Article_Author

In [None]:
# declare variables
match_count = None
article_subject = None
my_name = None
my_verbatim_name = None
my_lookup_name = None

# set up base queryset
all_article_subjects_qs = Article_Subject.objects.all()
match_count = all_article_subjects_qs.count()
print( "total Article_Subject count = {}".format( match_count ) )

# just automated coder.
all_article_subjects_qs = all_article_subjects_qs.filter( article_data__coder = automated_coder )
match_count = all_article_subjects_qs.count()
print( "automated Article_Subject count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = all_article_subjects_qs.exclude( name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = article_subject_qs.exclude( name__isnull = True )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\"; exclude NULL --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
#-- END loop over sample of Article_Subject instances --#

print( "\n" )

# look for any that have no space in verbatim_name.
article_subject_qs = all_article_subjects_qs.exclude( verbatim_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude verbatim_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in lookup_name.
article_subject_qs = all_article_subjects_qs.exclude( lookup_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude lookup_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
#-- END loop over sample of Article_Subject instances --#

In [None]:
# declare variables
article_author = None
article_data = None
article = None
author_string_parts_list = None
author_name_string = None
author_person = None
person_name_string = None
do_updates = None

# configure
do_updates = True

# output the Article_Author records to fix
name_qs = name_qs.order_by( 'id' )
for article_author in name_qs:
    
    # just print it out.
    print( "\nArticle_Author to fix: {}".format( article_author ) )
    
    # retrieve the Article_Data
    article_data = article_author.article_data
    
    # retrieve the article
    article = article_data.article
    
    # grab the author string
    author_string = article.author_string
    
    print ( "----> author_string: {}".format( author_string ) )
    
    # split on "/", get first token, and strip it.
    author_string_parts_list = author_string.split( "/" )
    author_name_string = author_string_parts_list[ 0 ]
    author_name_string = author_name_string.strip()
    print ( "----> article author name string: \"{}\"".format( author_name_string ) )
    
    # retrieve person
    author_person = article_author.person
    
    # retrieve person name string
    person_name_string = author_person.get_name_string()
    print ( "----> person name string: \"{}\"".format( person_name_string ) )
    
    # are the two strings the same? If so, fix Article_Author. If not, output.
    if ( author_name_string == person_name_string ):
        
        # same.
        print( "----> SAME" )
        if ( do_updates == True ):
            
            # update Article_Author.
            article_author.name = author_name_string
            article_author.verbatim_name = author_name_string
            article_author.lookup_name = author_name_string
            article_author.save()

            print( "----> ...fixed" )
    
        #-- END check if we want to actually update/fix --#
    
    else:
        
        # not same.
        print( "----> NOT same" )
        
    #-- END check if string from article and rendered person name are the same. --#
    
#-- END loop over busted Article_Author instances --#

# Article_Data for GRP analysis

In [None]:
# init
grp_article_data_qs = None
article_data_count = None
work_qs = None

In [None]:
# get all Article_Data.
grp_article_data_qs = Article_Data.objects.all()

# how many we starting with?
article_data_count = grp_article_data_qs.count()

print( "Starting with {} total Article_Data instances.".format( article_data_count ) )

## GRP - Only OpenCalais v.2

In [None]:
my_coder_type = ocv2_coder_type

# filter to just Article_Data with coder_type of "OpenCalais_REST_API_v2"
grp_article_data_qs = grp_article_data_qs.filter( coder = automated_coder )
grp_article_data_qs = grp_article_data_qs.filter( coder_type = my_coder_type )

# how many now?
article_data_count = grp_article_data_qs.count()

print( "{} Article_Data instances for coder_type {}.".format( article_data_count, my_coder_type ) )

## GRP - Only Grand Rapids Press

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()
my_newspaper = grand_rapids_press

# filter to just Article_Data coded by this user.
grp_article_data_qs = grp_article_data_qs.filter( article__newspaper = my_newspaper )

# how many now?
article_data_count = grp_article_data_qs.count()

print( "{} Article_Data instances for newspaper {}.".format( article_data_count, my_newspaper ) )

# Person exploration

In [None]:
# get all Persons
person_qs = Person.objects.all()
person_count = person_qs.count()
print( "{} Persons".format( person_count ) )

In [None]:
#=============================================================================#
# declare variables
#=============================================================================#
person = None
name_part_count = None
is_single_name = None
print_every_x_records = None
record_counter = None
person_article_subject_qs = None
my_article_subject = None
subject_verbatim_name = None
article_subject_name_token_count = None
article_subject_name_part_count = None

# declare variables - auditing
do_output_progress = None
print_every_x_records = None
record_counter = None
my_start_dt = None
current_dt = None
current_elapsed = None
total_elapsed = None
total_average = None
previous_dt = None

# declare variables - counts
single_name_count = None
multi_name_part_count = None
crazy_name_part_count = None
subject_name_count_mismatch_count = None
subject_token_mismatch_count = None
missing_verbatim_name_count = None

#=============================================================================#
# config/init
#=============================================================================#
do_output_progress = True
print_every_x_records = 10000
my_start_dt = datetime.datetime.now()
previous_dt = my_start_dt

# init counters
record_counter = 0
single_name_count = 0
multi_name_part_count = 0
crazy_name_part_count = 0
subject_name_count_mismatch_count = 0
subject_token_mismatch_count = 0
missing_verbatim_name_count = 0

#=============================================================================#
# loop.
#=============================================================================#
record_counter = 0
for person in person_qs:
    
    record_counter += 1
    
    # get name part count
    name_part_count = person.get_name_part_count()
    
    # single name?
    if ( name_part_count == 1 ):
    
        # is single name.
        is_single_name = True
        
        # increment count
        single_name_count += 1
    
    elif ( name_part_count > 1 ):
        
        # multiple name parts.
        multi_name_part_count += 1
        
    else:
        
        # hmmm. What?
        crazy_name_part_count += 1
        
    #-- END check if single_name --#
    
    # get set of related Article_Subject instances
    person_article_subject_qs = person.article_subject_set.all()
    for my_article_subject in person_article_subject_qs:
        
        # got subject_verbatim_name
        subject_verbatim_name = my_article_subject.verbatim_name
        if ( ( subject_verbatim_name is not None ) and ( subject_verbatim_name != "" ) ):
            
            # get name part and name token counts.
            article_subject_name_token_count = my_article_subject.get_verbatim_name_token_count()
            article_subject_name_part_count = my_article_subject.get_verbatim_name_part_count()

            # same token count as person name part count?
            if ( article_subject_name_token_count != name_part_count ):

                # not same - mismatch
                subject_token_mismatch_count += 1

            #-- END check if token count = name part count --#

            # same name part count as person name part count?
            if ( article_subject_name_part_count != name_part_count ):

                # not same - mismatch
                subject_name_count_mismatch_count += 1

            #-- END check if token count = name part count --#
            
        else:
            
            missing_verbatim_name_count += 1
            
        #-- END check if verbatim name present. --#
        
    #-- END loop over related Article_Subject instances --#
    
    # output a progress message?
    if ( ( ( record_counter % print_every_x_records ) == 0 )
        and ( do_output_progress == True ) ):

        # basic timing analysis.
        current_dt = datetime.datetime.now()
        current_elapsed = current_dt - previous_dt
        total_elapsed = current_dt - my_start_dt
        total_average = total_elapsed / record_counter
        previous_dt = current_dt

        status_message = "processed {counter} of {count} records @ {right_now} ( timing: last {current_count} elapsed = {current_elapsed}; total elapsed = {total_elapsed}; average = {total_average} ).".format(
            counter = record_counter,
            count = person_count,
            right_now = current_dt,
            current_count = print_every_x_records,
            current_elapsed = current_elapsed,
            total_elapsed = total_elapsed,
            total_average = total_average
        )
        print( status_message )
        #self.output_log_message(
        #    status_message,
        #    method_IN = me,
        #    indent_with_IN = "\n\n----> ",
        #    log_level_code_IN = logging.INFO,
        #    do_print_IN = True
        #)

    #-- END periodic status update. --#

#-- END loop over persons. --#

print( "Processed {} people:".format( record_counter ) )
print( "- single_name_count = {}".format( single_name_count ) )
print( "- multi_name_part_count = {}".format( multi_name_part_count ) )
print( "- crazy_name_part_count = {}".format( crazy_name_part_count ) )
print( "- subject_token_mismatch_count = {}".format( subject_token_mismatch_count ) )
print( "- subject_name_count_mismatch_count = {}".format( subject_name_count_mismatch_count ) )
