# Setup

## Setup - Imports

In [None]:
# python base imports
import datetime
import json

# import six
import six

print( "packages imported at " + str( datetime.datetime.now() ) )

## Setup - Initialize Django

- Back to [Table of Contents](#Table-of-Contents)

First, initialize my dev django project, so I can run code in this notebook that references my django models and can talk to the database using my project's settings.

In [None]:
%run ../django_init.py

In [None]:
# django imports
from django.contrib.auth.models import User

# sourcenet imports
from context_text.shared.context_text_base import ContextTextBase

# context_analysis imports
from context_analysis.network.network_person_info import NetworkPersonInfo

# sourcenet imports
from context_text.models import Article
from context_text.models import Article_Author
from context_text.models import Article_Data
from context_text.models import Article_Subject
from context_text.models import Newspaper
from context_text.models import Person

# article coding
from context_text.article_coding.article_coder import ArticleCoder
#from context_text.article_coding.article_coding import ArticleCoding
from context_text.article_coding.open_calais_v2.open_calais_v2_article_coder import OpenCalaisV2ArticleCoder

# import class that actually processes requests for outputting networks.
from context_text.export.network_output import NetworkOutput

# context_text shared
from context_text.shared.context_text_base import ContextTextBase

print( "django model packages imported at " + str( datetime.datetime.now() ) )

## Setup - Important instances

In [None]:
# get ArticleCoding instance.
#article_coding = ArticleCoding()

# automated coding user
automated_coder = ArticleCoder.get_automated_coding_user()

# newspapers for Grand Rapids Press and Detroit News.
grand_rapids_press = Newspaper.objects.get( newsbank_code = "GRPB" )
detroit_news = Newspaper.objects.get( newsbank_code = "DTNB" )

# network data output test

In [None]:
request_json_string = """{
    "csrfmiddlewaretoken": "2TkA5hqznbLUQDJA63yw67i20ItvuA1wDigtsWZg65ieSP00lorVMb452yTn6W11",
    "start_date": "2009-12-01",
    "end_date": "2009-12-31",
    "date_range": "",
    "publications": "1",
    "coder_id_priority_list": "2",
    "coder_type_filter_type": "automated",
    "coder_types_list": "OpenCalais_REST_API_v2",
    "tags_list": "grp_month",
    "unique_identifiers": "",
    "allow_duplicate_articles": "no",
    "include_source_contact_types": [
        "direct",
        "event",
        "past_quotes",
        "document",
        "other"
    ],
    "network_download_as_file": "yes",
    "network_include_render_details": "no",
    "output_type": "tab_delimited_matrix",
    "network_data_output_type": "net_and_attr_cols",
    "network_label": "",
    "network_include_headers": "yes",
    "person_query_type": "custom",
    "person_start_date": "2009-12-01",
    "person_end_date": "2009-12-31",
    "person_date_range": "",
    "person_publications": "1",
    "person_coders": "2",
    "person_coder_id_priority_list": "",
    "person_coder_type_filter_type": "automated",
    "person_coder_types_list": "OpenCalais_REST_API_v2",
    "person_tag_list": "grp_month",
    "person_unique_identifiers": "",
    "person_allow_duplicate_articles": "yes"
}"""
request_json = json.loads( request_json_string )
print( request_json ) 

In [None]:
# try creating network data.
network_outputter = NetworkOutput()
network_data = network_outputter.process_network_output_request(
    params_IN = request_json,
    debug_flag_IN = None
)

In [None]:
print( "Network data length: {}".format( len( network_data ) ) )

In [None]:
# write the output to a file
current_date_time = None
my_file_extension = None
network_data_file_path = None
network_data_file = None

# time stamp and file extension to append to file name
current_date_time = datetime.datetime.now().strftime( '%Y%m%d-%H%M%S' )
my_file_extension = "txt"

# make file path.
network_data_file_path = "context_text_data-{timestamp}.{file_extension}".format(
    timestamp = current_date_time,
    file_extension = my_file_extension
)

# write to file.
with open( network_data_file_path, 'w' ) as network_data_file:

    # output all the data to file.
    network_data_file.write( network_data )
    
#-- END with open( network_data_file_path, 'w' ) as network_data_file --#

print( "network data written to file {} at {}".format( network_data_file_path, datetime.datetime.now() ) )

# Filter Articles

- Using details from: [newsbank-article_coding.ipynb](../data/article_coding/newsbank-article_coding.ipynb)

In [None]:
article_qs = Article.objects.all()

## Detroit News

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()
my_newspaper = detroit_news

# filter to just Article_Data coded by this user.
article_qs = article_qs.filter( newspaper = my_newspaper )

# how many now?
article_data_count = article_qs.count()

print( "{} Article instances for newspaper {}.".format( article_data_count, my_newspaper ) )

### Detroit News - coded tag

In [None]:
tags_in_list = []
tags_in_list.append( OpenCalaisV2ArticleCoder.TAG_CODED_BY_ME )
article_qs = article_qs.filter( tags__name__in = tags_in_list )
print( "Tags {} - Matching article count: {}".format( tags_in_list, article_qs.count() ) )

### Detroit News - hard news tag

In [None]:
tags_in_list = []
tags_in_list.append( ContextTextBase.TAG_LOCAL_HARD_NEWS )
article_qs = article_qs.filter( tags__name__in = tags_in_list )
print( "Tags {} - Matching article count: {}".format( tags_in_list, article_qs.count() ) )

## Grand Rapids Press

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()
my_newspaper = grand_rapids_press

# filter to just Article_Data coded by this user.
article_qs = article_qs.filter( newspaper = my_newspaper )

# how many now?
article_data_count = article_qs.count()

print( "{} Article instances for newspaper {}.".format( article_data_count, my_newspaper ) )

### Grand Rapids Press - coded tag

In [None]:
tags_in_list = []
tags_in_list.append( OpenCalaisV2ArticleCoder.TAG_CODED_BY_ME )
article_qs = article_qs.filter( tags__name__in = tags_in_list )
print( "Tags {} - Matching article count: {}".format( tags_in_list, article_qs.count() ) )

### Grand Rapids Press - hard news tag

In [None]:
tags_in_list = []
tags_in_list.append( ContextTextBase.TAG_LOCAL_HARD_NEWS )
article_qs = article_qs.filter( tags__name__in = tags_in_list )
print( "Tags {} - Matching article count: {}".format( tags_in_list, article_qs.count() ) )

# Filter Article_Data

In [None]:
article_data_qs = None
article_data_count = None

# get all Article_Data.
article_data_qs = Article_Data.objects.all()

# how many we starting with?
article_data_count = article_data_qs.count()

print( "Starting with {} total Article_Data instances.".format( article_data_count ) )

## Detect single-name people within Article_Data

- code to filter out those with single names... where?

    - notebook where work was done originally (just notes - it was manual): [prelim_month-create_Reliability_Names_data.ipynb](./methods/data_creation/prelim_month-create_Reliability_Names_data.ipynb)
    - code to filter to just single first names is in `context_analysis/views.py --> reliability_names_disagreement_view()`:
    
            if ( reliability_names_only_first_name == True ):

                # to start, first name needs to not be null and
                #     not be empty.
                reliability_names_qs = reliability_names_qs.filter( 
                    Q( person__first_name__isnull = False ) & ~Q( person__first_name = "" ),
                    Q( person__middle_name__isnull = True ) | Q( person__middle_name = "" ),
                    Q( person__last_name__isnull = True ) | Q( person__last_name = "" ),
                    Q( person__name_prefix__isnull = True ) | Q( person__name_prefix = "" ),
                    Q( person__name_suffix__isnull = True ) | Q( person__name_suffix = "" ),
                    Q( person__nickname__isnull = True ) | Q( person__nickname = "" ),
                )

            #-- END only first name --#

Person in Article_Data

- Article_Data

    - Article_Author
    - Article_Subject
    - both Article_Author and Article_Subject have "person" relation that ties to person instance for name and other details.
    - They also have name fields:
    
        - name
        - verbatim_name
        - lookup_name
        
    - Should mine the above to see how widely and reliably the name fields were set - could just look for names with no internal spaces there...

In [None]:
# start with Article_Data QuerySet
article_data_qs = Article_Data.objects.all()

## Only automated coder

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()

# filter to just Article_Data coded by this user.
article_data_qs = article_data_qs.filter( coder = automated_coder )

# how many now?
article_data_count = article_data_qs.count()

print( "{} Article_Data instances for coder {}.".format( article_data_count, automated_coder ) )

## Only Detroit News

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()
my_newspaper = detroit_news

# filter to just Article_Data coded by this user.
article_data_qs = article_data_qs.filter( article__newspaper = my_newspaper )

# how many now?
article_data_count = article_data_qs.count()

print( "{} Article_Data instances for newspaper {}.".format( article_data_count, my_newspaper ) )

## Only Grand Rapids Press

In [None]:
# get automated coder user.
#automated_coder = ArticleCoder.get_automated_coding_user()
my_newspaper = grand_rapids_press

# filter to just Article_Data coded by this user.
article_data_qs = article_data_qs.filter( article__newspaper = my_newspaper )

# how many now?
article_data_count = article_data_qs.count()

print( "{} Article_Data instances for newspaper {}.".format( article_data_count, my_newspaper ) )