**analysis-network_data_output_example.ipynb - Programmatic network data output**

# Setup

## Setup - Debug

- Back to [Table of Contents](#Table-of-Contents)

In [None]:
debug_flag = False

## Setup - Imports

In [None]:
# python base imports
import datetime
import hashlib
import json
import logging

# import six
import six

print( "packages imported at " + str( datetime.datetime.now() ) )

## Setup - working folder paths

- Back to [Table of Contents](#Table-of-Contents)

In [None]:
%pwd

In [None]:
# current working folder
django_project_folder = "/home/jonathanmorgan/work/django/research/research"
current_working_folder = "{django_project_folder}/work/phd_work/analysis".format(
    django_project_folder = django_project_folder
)
current_datetime = datetime.datetime.now()
current_date_string = current_datetime.strftime( "%Y-%m-%d-%H-%M-%S" )

In [None]:
# current working folder
project_name = "research"
project_base_folder = "/home/jonathanmorgan/work/django/{project_name}".format( project_name = project_name )
django_project_folder = "{base_folder}/{project_name}".format(
    base_folder = project_base_folder,
    project_name = project_name
)
current_working_folder = "{django_project_folder}/work/phd_work/analysis/".format(
    django_project_folder = django_project_folder
)
current_datetime = datetime.datetime.now()
current_date_string = current_datetime.strftime( "%Y-%m-%d-%H-%M-%S" )

## Setup - logging

- Back to [Table of Contents](#Table-of-Contents)

configure logging for this notebook's kernel (If you do not run this cell, you'll get the django application's logging configuration.

In [None]:
# build file name
project_log_folder = "{base_folder}/logs".format( base_folder = project_base_folder )
logging_file_name = "{}/network_data_output-example-{}.log.txt".format( project_log_folder, current_date_string )

# set up logging.
logging.basicConfig(
    level = logging.DEBUG,
    format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    filename = logging_file_name,
    filemode = 'w' # set to 'a' if you want to append, rather than overwrite each time.
)

## Setup - Initialize Django

- Back to [Table of Contents](#Table-of-Contents)

First, initialize my dev django project, so I can run code in this notebook that references my django models and can talk to the database using my project's settings.

In [None]:
# init django
django_init_folder = "{django_project_folder}/work/phd_work".format(
    django_project_folder = django_project_folder
)
django_init_path = "django_init.py"
if( ( django_init_folder is not None ) and ( django_init_folder != "" ) ):
    
    # add folder to front of path.
    django_init_path = "{}/{}".format( django_init_folder, django_init_path )
    
#-- END check to see if django_init folder. --#

In [None]:
%run $django_init_path

### Setup - django-related imports

In [None]:
# python utilities
from python_utilities.strings.string_helper import StringHelper

# import class that actually processes requests for outputting networks.
from context_text.export.network_output import NetworkOutput

print( "django model packages imported at " + str( datetime.datetime.now() ) )

## Setup - functions

### Setup - function `make_string_hash()`

In [None]:
def make_string_hash( value_IN, hash_function_IN = hashlib.sha256 ):

    # return reference
    value_OUT = None

    # declare variables
    me = "make_string_hash"

    # call StringHelper method.
    value_OUT = StringHelper.make_string_hash( value_IN, hash_function_IN = hash_function_IN )

    return value_OUT

#-- END function make_string_hash() --#

print( "function make_string_hash() defined at " + str( datetime.datetime.now() ) )

# network data output example - full data

- _Note: only pass True to `network_outputter.process_network_output_request( debug_flag_IN )` if you really need to debug - it adds garbage data at the end of the output, even if you ask for no render details._

## create network data from full data, automated coder

In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
    "start_date": "2009-12-01",
    "end_date": "2009-12-31",
    "date_range": "",
    "publications": "1",
    "coder_id_priority_list": "2",
    "coder_type_filter_type": "automated",
    "coder_types_list": "OpenCalais_REST_API_v2",
    "tags_list": "grp_month",
    "unique_identifiers": "",
    "allow_duplicate_articles": "no",
    "include_source_contact_types": [
        "direct",
        "event",
        "past_quotes",
        "document",
        "other"
    ],
    "exclude_persons_with_tags_in_list": "",
    "include_persons_with_single_word_name": "no",
    "network_download_as_file": "yes",
    "network_include_render_details": "no",
    "output_type": "tab_delimited_matrix",
    "network_data_output_type": "net_and_attr_cols",
    "network_label": "grp_month",
    "network_include_headers": "yes",
    "person_query_type": "custom",
    "person_start_date": "2009-12-01",
    "person_end_date": "2009-12-31",
    "person_date_range": "",
    "person_publications": "1",
    "person_coders": "2",
    "person_coder_id_priority_list": "",
    "person_coder_type_filter_type": "automated",
    "person_coder_types_list": "OpenCalais_REST_API_v2",
    "person_tags_list": "grp_month",
    "person_unique_identifiers": "",
    "person_allow_duplicate_articles": "yes",
    "database_output": "yes",
    "db_add_timestamp_to_label": "yes",
    "db_save_data_in_database": "yes",
    "save_data_in_folder": "."
}"""

request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

In [None]:
network_data_length = len( network_data )
should_be = 2427606
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
#-- END debug/test --#

- if include_persons_with_single_word_name = "yes": 2427606
- if include_persons_with_single_word_name = "no": 2344545

# network data output examples - export unit test data

- _Note: only pass True to `network_outputter.process_network_output_request( debug_flag_IN )` if you really need to debug - it adds garbage data at the end of the output, even if you ask for no render details._

## create network data from "export" unit test data - GRP, all names

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

In [None]:
#include_single_word_names = "yes"
include_single_word_names = "yes"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "all_names",
  "person_coders": "7",
  "database_output": "yes",
  "db_add_timestamp_to_label": "no",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": [
    "direct",
    "event",
    "past_quotes",
    "document",
    "other"
  ],
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "",
  "include_persons_with_single_word_name": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = True
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "f879560cab27653185bb4e42baec40b6a5d685b4143388e55041399acb921c5f"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 14121
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 74
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 1049, 752 should be present.
find_person_id = 1049
if ( find_person_id in master_person_dict ):
    
    print( "SUCCESS - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "ERROR - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 1049 --#

find_person_id = 752
if ( find_person_id in master_person_dict ):
    
    print( "SUCCESS - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "ERROR - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 752 --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

## network data from "export" unit test data - no single names

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "no_single_names",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "",
  "include_persons_with_single_word_name": "no"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "f85a48630c029f848bbb815d003b188eff38346b8eac0da2d55b7b224b323ac5"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 13448
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 72
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 1049, 752 should not be present.
find_person_list = list()
find_person_list.append( 1049 )
find_person_list.append( 752 )
for find_person_id in find_person_list:

    if ( find_person_id in master_person_dict ):
    
        print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
    else:
    
        print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
    #-- END check for person --#

#-- END loop over persons to find. --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

## network data from "export" unit test data - exclude tag `from_press_release`

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

Tag `from_press_release` added to the following `Article_Subject` instances:

- 740 - granholm (person 102)
- 637 - Mark Meadows (person 224)
- 677 - Gary Nelund (person 261)


In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "exclude_from_press_release",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "from_press_release",
  "include_persons_with_single_word_name": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "3529e49830a8464cc0d8a497345b56404c73b867b1046fb38df346953a9b3b72"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 13122
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 71
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 102, 224, 261 should not be present.
find_person_list = list()
find_person_list.append( 102 )
find_person_list.append( 224 )
find_person_list.append( 261 )
for find_person_id in find_person_list:

    if ( find_person_id in master_person_dict ):
    
        print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
    else:
    
        print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
    #-- END check for person --#

#-- END loop over persons to find. --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

## network data from "export" unit test data - exclude tag `godwin_heights`

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

Tag `godwin_heights` added to the following `Article_Subject` instances:

- 623 - Felske, Jon (person 188)
- 622 - Johnston, Allen E. (person 187)
- 621 - Hornecker, Kenneth (person 189)


In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "exclude_godwin_heights",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "godwin_heights",
  "include_persons_with_single_word_name": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "59e1b6ba6aab28cf37fcb45877d8cdd86d8593df9fa506352d0abd1b6fd3c29b"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 13122
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 71
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 187, 188, 189 should not be present.
find_person_list = list()
find_person_list.append( 187 )
find_person_list.append( 188 )
find_person_list.append( 189 )
for find_person_id in find_person_list:

    if ( find_person_id in master_person_dict ):
    
        print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
    else:
    
        print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
    #-- END check for person --#

#-- END loop over persons to find. --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

## network data from "export" unit test data - exclude tags `from_press_release` and `godwin_heights`

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

Tag `from_press_release` added to the following `Article_Subject` instances:

- 740 - granholm (person 102)
- 637 - Mark Meadows (person 224)
- 677 - Gary Nelund (person 261)

Tag `godwin_heights` added to the following `Article_Subject` instances:

- 623 - Felske, Jon (person 188)
- 622 - Johnston, Allen E. (person 187)
- 621 - Hornecker, Kenneth (person 189)


In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "exclude_two_tags",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "from_press_release,godwin_heights",
  "include_persons_with_single_word_name": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "441127876c15eda7fb6cbf64e8555e011a2f459ba64b7111ac3dd4cbcdafbb2a"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 12159
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 68
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 102, 224, 261, 187, 188, 189 should not be present.
find_person_list = list()
find_person_list.append( 102 )
find_person_list.append( 224 )
find_person_list.append( 261 )
find_person_list.append( 187 )
find_person_list.append( 188 )
find_person_list.append( 189 )
for find_person_id in find_person_list:

    if ( find_person_id in master_person_dict ):
    
        print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
    else:
    
        print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
    #-- END check for person --#

#-- END loop over persons to find. --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

## network data from "export" unit test data - no single names, exclude tags `from_press_release` and `godwin_heights`

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

Tag `from_press_release` added to the following `Article_Subject` instances:

- 740 - granholm (person 102)
- 637 - Mark Meadows (person 224)
- 677 - Gary Nelund (person 261)

Tag `godwin_heights` added to the following `Article_Subject` instances:

- 623 - Felske, Jon (person 188)
- 622 - Johnston, Allen E. (person 187)
- 621 - Hornecker, Kenneth (person 189)


In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "exclude_two_tags_and_single_names",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tags_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "from_press_release,godwin_heights",
  "include_persons_with_single_word_name": "no"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = False
)

In [None]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "0f8a530f18a724b3d724d7fe9caa3082954c049abdc02b77bc480fc432d0a770"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

In [None]:
network_data_length = len( network_data )
should_be = 11534
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

In [None]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 66
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# the following persons should not be present
find_person_list = list()

# 1049, 752 (single names)
find_person_list.append( 1049 )
find_person_list.append( 752 )

# 102, 224, 261 (tag `from_press_release`)
find_person_list.append( 102 )
find_person_list.append( 224 )
find_person_list.append( 261 )

# 187, 188, 189 (tag `godwin_heights`)
find_person_list.append( 187 )
find_person_list.append( 188 )
find_person_list.append( 189 )

# check for people who should have been removed.
for find_person_id in find_person_list:

    if ( find_person_id in master_person_dict ):
    
        print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
    else:
    
        print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
    #-- END check for person --#

#-- END loop over persons to find. --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

# write network data to file

In [None]:
# write the output to a file
current_date_time = None
my_file_extension = None
network_data_file_path = None
network_data_file = None

# time stamp and file extension to append to file name
current_date_time = datetime.datetime.now().strftime( '%Y%m%d-%H%M%S' )
my_file_extension = "txt"

# make file path.
network_data_file_path = "context_text_data-{timestamp}.{file_extension}".format(
    timestamp = current_date_time,
    file_extension = my_file_extension
)

# write to file.
with open( network_data_file_path, 'w' ) as network_data_file:

    # output all the data to file.
    network_data_file.write( network_data )
    
#-- END with open( network_data_file_path, 'w' ) as network_data_file --#

print( "network data written to file {} at {}".format( network_data_file_path, datetime.datetime.now() ) )