**analysis-network_data_output_example.ipynb - Programmatic network data output**

# Setup

## Setup - Imports

In [7]:
# python base imports
import datetime
import hashlib
import json

# import six
import six

print( "packages imported at " + str( datetime.datetime.now() ) )

packages imported at 2022-05-27 03:13:31.007587


## Setup - Initialize Django

- Back to [Table of Contents](#Table-of-Contents)

First, initialize my dev django project, so I can run code in this notebook that references my django models and can talk to the database using my project's settings.

In [2]:
%run ../django_init.py

django initialized at 2022-05-27 00:42:31.581299


In [9]:
# python utilities
from python_utilities.strings.string_helper import StringHelper

# import class that actually processes requests for outputting networks.
from context_text.export.network_output import NetworkOutput

print( "django model packages imported at " + str( datetime.datetime.now() ) )

django model packages imported at 2022-05-27 03:15:19.937260


## Setup - functions

### Setup - function `make_string_hash()`

In [10]:
def make_string_hash( value_IN, hash_function_IN = hashlib.sha256 ):

    # return reference
    value_OUT = None

    # declare variables
    me = "make_string_hash"

    # call StringHelper method.
    value_OUT = StringHelper.make_string_hash( value_IN, hash_function_IN = hash_function_IN )

    return value_OUT

#-- END function make_string_hash() --#


# network data output test

## create network data from full data

In [None]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
    "start_date": "2009-12-01",
    "end_date": "2009-12-31",
    "date_range": "",
    "publications": "1",
    "coder_id_priority_list": "2",
    "coder_type_filter_type": "automated",
    "coder_types_list": "OpenCalais_REST_API_v2",
    "tags_list": "grp_month",
    "unique_identifiers": "",
    "allow_duplicate_articles": "no",
    "include_source_contact_types": [
        "direct",
        "event",
        "past_quotes",
        "document",
        "other"
    ],
    "exclude_persons_with_tags_in_list": "",
    "include_persons_with_single_word_name": "no",
    "network_download_as_file": "yes",
    "network_include_render_details": "no",
    "output_type": "tab_delimited_matrix",
    "network_data_output_type": "net_and_attr_cols",
    "network_label": "",
    "network_include_headers": "yes",
    "person_query_type": "custom",
    "person_start_date": "2009-12-01",
    "person_end_date": "2009-12-31",
    "person_date_range": "",
    "person_publications": "1",
    "person_coders": "2",
    "person_coder_id_priority_list": "",
    "person_coder_type_filter_type": "automated",
    "person_coder_types_list": "OpenCalais_REST_API_v2",
    "person_tag_list": "grp_month",
    "person_unique_identifiers": "",
    "person_allow_duplicate_articles": "yes"
}"""

request_json = json.loads( request_json_string )
print( request_json ) 

In [20]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = True
)

In process_network_output_request: parameter debug - Article selection parameters:
-----------------------------
start_date = "2009-12-07";
end_date = "2010-02-13";
date_range = "";
publications = "['1']";
coders = "['7']";
coder_id_priority_list = "[]";
coder_type_filter_type = "automated";
coder_types_list = "['OpenCalais_REST_API_v2']";
topics = "[]";
tags_list = "[]";
unique_identifiers = "[]";
allow_duplicate_articles = "no";
include_source_contact_types = "['direct', 'event', 'past_quotes', 'document', 'other']";
include_capacities = "[]";
exclude_capacities = "[]";
header_prefix = "";
network_download_as_file = "no";
network_include_render_details = "no";
output_type = "tab_delimited_matrix";
network_data_output_type = "net_and_attr_cols";
network_label = "groovy";
network_include_headers = "no";

Person selection parameters:
----------------------------
person_query_type = "custom";
person_start_date = "2009-12-07";
person_end_date = "2010-02-13";
person_date_range = "";
person

In [21]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

Network data hash: 21a0454d2421232de4a72058c36284ea52dc29e1f5d11a408a3f4db7f14f97a8


In [18]:
network_data_length = len( network_data )
should_be = 2427606
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
#-- END debug/test --#

Network data length: 19282
ERROR! network data length is 19282, should be 2427606


- if include_persons_with_single_word_name = "yes": 2427606
- if include_persons_with_single_word_name = "no": 2344545

## create network data from "export" unit test data - all names

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

In [44]:
#include_single_word_names = "yes"
include_single_word_names = "yes"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "all_names",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tag_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "",
  "include_persons_with_single_word_name": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

{'coders': '7', 'end_date': '2010-02-13', 'tags_list': '', 'date_range': '', 'start_date': '2009-12-07', 'output_type': 'tab_delimited_matrix', 'publications': '1', 'network_label': 'all_names', 'person_coders': '7', 'database_output': 'yes', 'person_end_date': '2010-02-13', 'person_tag_list': '', 'coder_types_list': 'OpenCalais_REST_API_v2', 'person_date_range': '', 'person_query_type': 'custom', 'person_start_date': '2009-12-07', 'unique_identifiers': '', 'person_publications': '1', 'coder_id_priority_list': '', 'coder_type_filter_type': 'automated', 'network_include_headers': 'no', 'person_coder_types_list': 'OpenCalais_REST_API_v2', 'allow_duplicate_articles': 'no', 'network_data_output_type': 'net_and_attr_cols', 'network_download_as_file': 'no', 'person_unique_identifiers': '', 'include_source_contact_types': 'direct,event,past_quotes,document,other', 'person_coder_id_priority_list': '', 'person_coder_type_filter_type': 'automated', 'network_include_render_details': 'no', 'person

In [45]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = True
)

In process_network_output_request: parameter debug - Article selection parameters:
-----------------------------
start_date = "2009-12-07";
end_date = "2010-02-13";
date_range = "";
publications = "['1']";
coders = "['7']";
coder_id_priority_list = "[]";
coder_type_filter_type = "automated";
coder_types_list = "['OpenCalais_REST_API_v2']";
topics = "[]";
tags_list = "[]";
unique_identifiers = "[]";
allow_duplicate_articles = "no";
include_source_contact_types = "['direct', 'event', 'past_quotes', 'document', 'other']";
include_capacities = "[]";
exclude_capacities = "[]";
header_prefix = "";
network_download_as_file = "no";
network_include_render_details = "no";
output_type = "tab_delimited_matrix";
network_data_output_type = "net_and_attr_cols";
network_label = "all_names";
network_include_headers = "no";

Person selection parameters:
----------------------------
person_query_type = "custom";
person_start_date = "2009-12-07";
person_end_date = "2010-02-13";
person_date_range = "";
per

In [46]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "fbbe4a2bd941c54a61c77ffb38ae7182720d67f41275a2dea7a3f752a438e983"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

Network data hash: fbbe4a2bd941c54a61c77ffb38ae7182720d67f41275a2dea7a3f752a438e983
MATCH - network data hash fbbe4a2bd941c54a61c77ffb38ae7182720d67f41275a2dea7a3f752a438e983 matches expected. hooray!


In [47]:
network_data_length = len( network_data )
should_be = 14156
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "MATCH - string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

Network data length: 14156
MATCH - string len()gth of 14156 matches expected. hooray!


In [49]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 74
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 1049, 752 should be present.
find_person_id = 1049
if ( find_person_id in master_person_dict ):
    
    print( "SUCCESS - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "ERROR - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 1049 --#

find_person_id = 752
if ( find_person_id in master_person_dict ):
    
    print( "SUCCESS - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "ERROR - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 752 --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

In create_q_filter_automated_by_coder_type(): automated coder user: 7 - automated
- person count: 74
MATCH - person count of 74 matches expected. hooray!
SUCCESS - single-name person 1049 is in dictionary
SUCCESS - single-name person 752 is in dictionary

==> Person 161: 161 - Bickel, Nardy Baeza ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 46: 46 - Reens, Nate ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 163: 163 - Goodell, Pete ( manager, Pando Winter Sports Park )

==> Person 166: 166 - DeGraaf, Rick ( skiier )

==> Person 165: 165 - Brown, Steve ( manager, Cannonsburg Ski Area )

==> Person 164: 164 - Dukesherer, Bob ( meteorologist )

==> Person 178: 178 - Scott, Monica ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 179: 179 - Nystrom, Mike ( Michigan Infrastructure and Transportation Association spokesman )

==> Person 181: 181 - Brito, Jonathan

==> Person 182: 182 - Salatka, Jerry ( Battalion Chief  )

==> Person 30: 30 - Maka

## network data from "export" unit test data - no single names

- See [`context_text` github README](https://github.com/jonathanmorgan/context_text#test-data) for more details on loading this data.

In [50]:
#include_single_word_names = "yes"
include_single_word_names = "no"

request_json_string = """{
  "coders": "7",
  "end_date": "2010-02-13",
  "tags_list": "",
  "date_range": "",
  "start_date": "2009-12-07",
  "output_type": "tab_delimited_matrix",
  "publications": "1",
  "network_label": "no_single_names",
  "person_coders": "7",
  "database_output": "yes",
  "person_end_date": "2010-02-13",
  "person_tag_list": "",
  "coder_types_list": "OpenCalais_REST_API_v2",
  "person_date_range": "",
  "person_query_type": "custom",
  "person_start_date": "2009-12-07",
  "unique_identifiers": "",
  "person_publications": "1",
  "coder_id_priority_list": "",
  "coder_type_filter_type": "automated",
  "network_include_headers": "no",
  "person_coder_types_list": "OpenCalais_REST_API_v2",
  "allow_duplicate_articles": "no",
  "network_data_output_type": "net_and_attr_cols",
  "network_download_as_file": "no",
  "person_unique_identifiers": "",
  "include_source_contact_types": "direct,event,past_quotes,document,other",
  "person_coder_id_priority_list": "",
  "person_coder_type_filter_type": "automated",
  "network_include_render_details": "no",
  "person_allow_duplicate_articles": "no",
  "exclude_persons_with_tags_in_list": "",
  "include_persons_with_single_word_name": "no"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

{'coders': '7', 'end_date': '2010-02-13', 'tags_list': '', 'date_range': '', 'start_date': '2009-12-07', 'output_type': 'tab_delimited_matrix', 'publications': '1', 'network_label': 'no_single_names', 'person_coders': '7', 'database_output': 'yes', 'person_end_date': '2010-02-13', 'person_tag_list': '', 'coder_types_list': 'OpenCalais_REST_API_v2', 'person_date_range': '', 'person_query_type': 'custom', 'person_start_date': '2009-12-07', 'unique_identifiers': '', 'person_publications': '1', 'coder_id_priority_list': '', 'coder_type_filter_type': 'automated', 'network_include_headers': 'no', 'person_coder_types_list': 'OpenCalais_REST_API_v2', 'allow_duplicate_articles': 'no', 'network_data_output_type': 'net_and_attr_cols', 'network_download_as_file': 'no', 'person_unique_identifiers': '', 'include_source_contact_types': 'direct,event,past_quotes,document,other', 'person_coder_id_priority_list': '', 'person_coder_type_filter_type': 'automated', 'network_include_render_details': 'no', '

In [51]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = True
)

In process_network_output_request: parameter debug - Article selection parameters:
-----------------------------
start_date = "2009-12-07";
end_date = "2010-02-13";
date_range = "";
publications = "['1']";
coders = "['7']";
coder_id_priority_list = "[]";
coder_type_filter_type = "automated";
coder_types_list = "['OpenCalais_REST_API_v2']";
topics = "[]";
tags_list = "[]";
unique_identifiers = "[]";
allow_duplicate_articles = "no";
include_source_contact_types = "['direct', 'event', 'past_quotes', 'document', 'other']";
include_capacities = "[]";
exclude_capacities = "[]";
header_prefix = "";
network_download_as_file = "no";
network_include_render_details = "no";
output_type = "tab_delimited_matrix";
network_data_output_type = "net_and_attr_cols";
network_label = "no_single_names";
network_include_headers = "no";

Person selection parameters:
----------------------------
person_query_type = "custom";
person_start_date = "2009-12-07";
person_end_date = "2010-02-13";
person_date_range = "

In [52]:
# create a hash of the data, for comparison
network_data_hash = make_string_hash( network_data )
print( "Network data hash: {}".format( network_data_hash ) )

# match?
should_be = "21a0454d2421232de4a72058c36284ea52dc29e1f5d11a408a3f4db7f14f97a8"
if ( network_data_hash != should_be ):
    
    # not right hash. Error.
    print( "ERROR! network data hash is {}, should be {}".format( network_data_hash, should_be ) )
    
else:
    
    # a match
    print( "MATCH - network data hash {} matches expected. hooray!".format( network_data_hash ) )
    
#-- END debug/test --#

Network data hash: 21a0454d2421232de4a72058c36284ea52dc29e1f5d11a408a3f4db7f14f97a8
MATCH - network data hash 21a0454d2421232de4a72058c36284ea52dc29e1f5d11a408a3f4db7f14f97a8 matches expected. hooray!


In [53]:
network_data_length = len( network_data )
should_be = 13483
print( "Network data length: {}".format( network_data_length ) )
if ( network_data_length != should_be ):
    
    # not right length. Error.
    print( "ERROR! network data length is {}, should be {}".format( network_data_length, should_be ) )
    
else:
    
    # a match
    print( "string len()gth of {} matches expected. hooray!".format( network_data_length ) )
    
#-- END debug/test --#

Network data length: 13483
string len()gth of 13483 matches expected. hooray!


In [54]:
# look at master person dict
master_person_dict = network_outputter.create_person_dict( load_person_IN = True )

# how many entries?
person_count = len( master_person_dict )
print( "- person count: {person_count}".format( person_count = person_count ) )

# right number?
should_be = 72
if ( person_count != should_be ):
    
    # not right length. Error.
    print( "ERROR! person count is {}, should be {}".format( person_count, should_be ) )
    
else:
    
    # a match
    print( "MATCH - person count of {} matches expected. hooray!".format( person_count ) )
    
#-- END debug/test --#

# persons 1049, 752 should be present.
find_person_id = 1049
if ( find_person_id in master_person_dict ):
    
    print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 1049 --#

find_person_id = 752
if ( find_person_id in master_person_dict ):
    
    print( "ERROR - single-name person {} is in dictionary".format( find_person_id ) )
    
else:
    
    print( "SUCCESS - single-name person {} not in dictionary".format( find_person_id ) )
    
#-- END check for person 752 --#

# output all persons.
for person_id, person_instance in master_person_dict.items():
    
    print( "\n==> Person {person_id}: {person_instance}".format( person_id = person_id, person_instance = person_instance ) )
    
#-- END loop over persons --#

In create_q_filter_automated_by_coder_type(): automated coder user: 7 - automated
- person count: 72
MATCH - person count of 72 matches expected. hooray!
SUCCESS - single-name person 1049 not in dictionary
SUCCESS - single-name person 752 not in dictionary

==> Person 161: 161 - Bickel, Nardy Baeza ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 46: 46 - Reens, Nate ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 163: 163 - Goodell, Pete ( manager, Pando Winter Sports Park )

==> Person 166: 166 - DeGraaf, Rick ( skiier )

==> Person 165: 165 - Brown, Steve ( manager, Cannonsburg Ski Area )

==> Person 164: 164 - Dukesherer, Bob ( meteorologist )

==> Person 178: 178 - Scott, Monica ( The Grand Rapids Press; The Grand Rapids Press )

==> Person 179: 179 - Nystrom, Mike ( Michigan Infrastructure and Transportation Association spokesman )

==> Person 181: 181 - Brito, Jonathan

==> Person 182: 182 - Salatka, Jerry ( Battalion Chief  )

==> Person 30: 30 - Ma

## write network data to file

In [None]:
# write the output to a file
current_date_time = None
my_file_extension = None
network_data_file_path = None
network_data_file = None

# time stamp and file extension to append to file name
current_date_time = datetime.datetime.now().strftime( '%Y%m%d-%H%M%S' )
my_file_extension = "txt"

# make file path.
network_data_file_path = "context_text_data-{timestamp}.{file_extension}".format(
    timestamp = current_date_time,
    file_extension = my_file_extension
)

# write to file.
with open( network_data_file_path, 'w' ) as network_data_file:

    # output all the data to file.
    network_data_file.write( network_data )
    
#-- END with open( network_data_file_path, 'w' ) as network_data_file --#

print( "network data written to file {} at {}".format( network_data_file_path, datetime.datetime.now() ) )