# Setup

## Setup - Imports

In [None]:
# python base imports
import datetime
import json

# import six
import six

print( "packages imported at " + str( datetime.datetime.now() ) )

## Setup - Initialize Django

- Back to [Table of Contents](#Table-of-Contents)

First, initialize my dev django project, so I can run code in this notebook that references my django models and can talk to the database using my project's settings.

In [None]:
%run ../django_init.py

In [None]:
# django imports
from django.contrib.auth.models import User

# sourcenet imports
from context_text.shared.context_text_base import ContextTextBase

# context_analysis imports
from context_analysis.network.network_person_info import NetworkPersonInfo

# sourcenet imports
from context_text.models import Article
from context_text.models import Article_Author
from context_text.models import Article_Data
from context_text.models import Article_Subject
from context_text.models import Newspaper
from context_text.models import Person

# article coding
from context_text.article_coding.article_coder import ArticleCoder
#from context_text.article_coding.article_coding import ArticleCoding
from context_text.article_coding.open_calais_v2.open_calais_v2_article_coder import OpenCalaisV2ArticleCoder

# import class that actually processes requests for outputting networks.
from context_text.export.network_output import NetworkOutput

# context_text shared
from context_text.shared.context_text_base import ContextTextBase

print( "django model packages imported at " + str( datetime.datetime.now() ) )

## Setup - Important instances

In [None]:
# get ArticleCoding instance.
#article_coding = ArticleCoding()

# automated coding user
automated_coder = ArticleCoder.get_automated_coding_user()

# newspapers for Grand Rapids Press and Detroit News.
grand_rapids_press = Newspaper.objects.get( newsbank_code = "GRPB" )
detroit_news = Newspaper.objects.get( newsbank_code = "DTNB" )

# Filter Article_Data

In [None]:
article_data_qs = None
article_data_count = None

# get all Article_Data.
article_data_qs = Article_Data.objects.all()

# how many we starting with?
article_data_count = article_data_qs.count()

print( "Starting with {} total Article_Data instances.".format( article_data_count ) )

## Detect single-name people within Article_Data

- code to filter out those with single names... where?

    - notebook where work was done originally (just notes - it was manual): [prelim_month-create_Reliability_Names_data.ipynb](./methods/data_creation/prelim_month-create_Reliability_Names_data.ipynb)
    - code to filter to just single first names is in `context_analysis/views.py --> reliability_names_disagreement_view()`:
    
            if ( reliability_names_only_first_name == True ):

                # to start, first name needs to not be null and
                #     not be empty.
                reliability_names_qs = reliability_names_qs.filter( 
                    Q( person__first_name__isnull = False ) & ~Q( person__first_name = "" ),
                    Q( person__middle_name__isnull = True ) | Q( person__middle_name = "" ),
                    Q( person__last_name__isnull = True ) | Q( person__last_name = "" ),
                    Q( person__name_prefix__isnull = True ) | Q( person__name_prefix = "" ),
                    Q( person__name_suffix__isnull = True ) | Q( person__name_suffix = "" ),
                    Q( person__nickname__isnull = True ) | Q( person__nickname = "" ),
                )

            #-- END only first name --#

Person in Article_Data

- Article_Data

    - Article_Author
    - Article_Subject
    - both Article_Author and Article_Subject have "person" relation that ties to person instance for name and other details.
    - They also have name fields:
    
        - name
        - verbatim_name
        - lookup_name
        
    - Should mine the above to see how widely and reliably the name fields were set - could just look for names with no internal spaces there...

In [None]:
# start with Article_Data QuerySet
article_data_qs = Article_Data.objects.all()

### Article_Subject

- Some have no name, verbatim_name, or lookup_name.

    - small sample included valid people where this data was just not captured early on.
    - see if I can write a script to populate from saved data.

- Some, name/verbatim_name/lookup_name are single name. These we'd want to omit, as long as this is actually the verbatim name from the article.

    - spot-check in [View article + coding](https://research.local/research/context/text/article/article_data/view_with_text/)
    - looks like it is, and so omit if the subject's name in Article_Subject has no spaces.
    - check programatically for single-name people (not only is verbatim name in Article_Subject single word, but also look at the different name elements of the "Person" associated with the single-name mention to see if the Person has just first names, also). Two reasons that single  of match:
    
        - from early days, program created person with single name part from article.
        - ...?  I already forgot.

In [None]:
# declare variables
match_count = None
article_subject = None
my_name = None
my_verbatim_name = None
my_lookup_name = None

# set up base queryset
all_article_subjects_qs = Article_Subject.objects.all()
match_count = all_article_subjects_qs.count()
print( "total Article_Subject count = {}".format( match_count ) )

# just automated coder.
all_article_subjects_qs = all_article_subjects_qs.filter( article_data__coder = automated_coder )
match_count = all_article_subjects_qs.count()
print( "automated Article_Subject count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = all_article_subjects_qs.exclude( name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = article_subject_qs.exclude( name__isnull = True )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\"; exclude NULL --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
    my_article_data = article_subject.article_data
    print( "- Article_Data: {}".format( my_article_data ) ) 

    my_article = my_article_data.article
    print( "- Article: {}".format( my_article ) ) 
    
#-- END loop over sample of Article_Subject instances --#

print( "\n" )

# look for any that have no space in verbatim_name.
article_subject_qs = all_article_subjects_qs.exclude( verbatim_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude verbatim_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in lookup_name.
article_subject_qs = all_article_subjects_qs.exclude( lookup_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude lookup_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
    my_article_data = article_subject.article_data
    print( "- Article_Data: {}".format( my_article_data ) ) 

    my_article = my_article_data.article
    print( "- Article: {}".format( my_article ) ) 
    
#-- END loop over sample of Article_Subject instances --#

### Article_Author

In [None]:
# declare variables
match_count = None
article_subject = None
my_name = None
my_verbatim_name = None
my_lookup_name = None

# set up base queryset
all_article_subjects_qs = Article_Subject.objects.all()
match_count = all_article_subjects_qs.count()
print( "total Article_Subject count = {}".format( match_count ) )

# just automated coder.
all_article_subjects_qs = all_article_subjects_qs.filter( article_data__coder = automated_coder )
match_count = all_article_subjects_qs.count()
print( "automated Article_Subject count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = all_article_subjects_qs.exclude( name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in name.
article_subject_qs = article_subject_qs.exclude( name__isnull = True )
match_count = article_subject_qs.count()
print( "exclude name__contains = \"<space>\"; exclude NULL --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
#-- END loop over sample of Article_Subject instances --#

print( "\n" )

# look for any that have no space in verbatim_name.
article_subject_qs = all_article_subjects_qs.exclude( verbatim_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude verbatim_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

# look for any that have no space in lookup_name.
article_subject_qs = all_article_subjects_qs.exclude( lookup_name__contains = " " )
match_count = article_subject_qs.count()
print( "exclude lookup_name__contains = \"<space>\" --> match count = {}".format( match_count ) )

for article_subject in article_subject_qs[ 0 : 10 ]:
    
    # get all names
    my_name = article_subject.name
    my_verbatim_name = article_subject.verbatim_name
    my_lookup_name = article_subject.lookup_name

    print( "\n{article_subject}:".format( article_subject = article_subject ) )
    print( "-          name: {}".format( my_name ) )
    print( "- verbatim_name: {}".format( my_verbatim_name ) )
    print( "-   lookup_name: {}".format( my_lookup_name ) )
    
#-- END loop over sample of Article_Subject instances --#