In [79]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

es_client = Elasticsearch(http_compress=True)
 

In [84]:
def last_case_generator(type, year):
    s = Search(using=es_client, index="case").filter('term',type=type).filter('term',year=year)
    s.aggs.bucket('per_county','terms', size=100, field='county').metric('last_case','max',field='number')
    response = s.execute()

    for county in response.aggregations.per_county.buckets: 
        yield (county.key, int(county.last_case.value))
    raise StopIteration

In [88]:
from datetime import datetime
from elasticsearch import helpers
import oscn


keys = [ 
        'county',
        'year',
        'type', 
        'number',
        'filed',
        'closed', 
        'offense',
        'parties',
        'issues',
        'counts',
        'docket',
        'source']

def safe_date(date_str):
    return datetime.strptime(date_str, '%m/%d/%Y') if date_str else datetime(1970,1,1,0,0)

def case_values(c):
    return [ 
            c.county,
            c.year,
            c.type, 
            c.number, 
            safe_date(c.filed),  
            safe_date(c.closed),  
            safe_date(c.offense),  
            c.parties, 
            c.issues, 
            c.counts, 
            c.docket, 
            c.source]

def lists2dict(keys, values):
    safe_values  = map(lambda v: v if v else [], values)
    return {k: v for k, v in map(lambda k, v: (k, v), keys, safe_values)}



def doc_generator(year, county, case_type, start):
    case_list = oscn.request.CaseList(year=year, county=county, type=case_type, start=start, stop=70)
    for case in case_list:
        
#         for idx, party in enumerate(case.parties):
#             yield {
#                     "_index": 'party',
#                     "_type": "_doc",
#                     "_id" : f"{case.case_index}-{idx}",
#                     "_source": party,
#                     "parent": case.case_index,
#                 }
            
#         if case.issues:
#             for idx, issue in enumerate(case.issues):
#                 issue_index = f"{case.case_index}-{idx}"
#                 yield {
#                     "_index": 'issue',
#                     "_type": "_doc",
#                     "_id" : issue_index,
#                     "_source": issue,
#                     "parent": case.case_index,

#                 }
#                 if issue['dispositions']:
#                     for idx, disposition in enumerate(issue['dispositions']):
#                         yield {
#                             "_index": 'disposition',
#                             "_type": "_doc",
#                             "_id" : f"{case.case_index}-{idx}",
#                             "_source": disposition,
#                             "parent": issue_index,
#                         }
#         if case.counts:
#             for idx, count in enumerate(case.counts):
#                 yield {
#                         "_index": 'counts',
#                         "_type": "_doc",
#                         "_id" : f"{case.case_index}-{idx}",
#                         "_source": count,
#                         "parent": case.case_index,
#                     }

#         for ids, minute in enumerate(case.docket):
            
#             yield {
#                     "_index": 'minute',
#                     "_type": "_doc",
#                     "_id" : f"{case.case_index}-{idx}",
#                     "_source": minute,
#                     "parent": case.case_index,
#                 }
        
        yield {
                "_index": 'case',
                "_type": "_doc",
                "_id" : f"{case.case_index}",
                "_source": lists2dict(keys, case_values(case)),
            }
    raise StopIteration


## Get new cases

In [90]:
years = ['2019']
counties = ['tulsa']
types = ['CV', 'PB', 'FD','CF','CM']


for year in years:
    for case_type in types:
        for county, last in last_case_generator(case_type,year):
            print(f'county:{county} last:{last}')
            start = last + 1
            helpers.bulk(es_client, doc_generator(year, county, case_type, start))

            

county:tulsa last:70
county:tulsa last:56
county:tulsa last:70
county:tulsa last:70
county:tulsa last:70


## All Cases

In [83]:

import oscn

years = ['2019']
counties = ['tulsa']
# counties = oscn.counties
types = ['CV', 'PB', 'FD','CF','CM']


for year in years:
    for case_type in types:
        for county in counties:
            print(f'year:{year} type:{case_type} county:{county}')
            helpers.bulk(es_client, doc_generator(year, county, case_type, 1))


            
            

year:2019 type:CV county:tulsa
year:2019 type:PB county:tulsa
year:2019 type:FD county:tulsa
year:2019 type:CF county:tulsa
year:2019 type:CM county:tulsa


In [48]:
c=oscn.request.Case(type='CV')

In [58]:
lists2dict(keys, case_values(c))

{'id': 'tulsa-CV-2018-1',
 'county': 'tulsa',
 'year': '2018',
 'type': 'CV',
 'number': 1,
 'filed': datetime.datetime(2018, 1, 2, 0, 0),
 'closed': datetime.datetime(2018, 6, 15, 0, 0),
 'offense': datetime.datetime(1970, 1, 1, 0, 0),
 'parties': [{'name': 'CRADDOCK, D S', 'type': 'Deceased'},
  {'name': 'SPRINGER, LESTER', 'type': 'Plaintiff'},
  {'name': 'STONE, EVELYN J', 'type': 'Defendant'}],
 'issues': [{'Filed Date': '01/02/2018',
   'Filed By': 'SPRINGER, LESTER',
   'Issue': 'QUIET TITLE (QUIET)',
   'dispositions': [{'Defendant': 'STONE, EVELYN J',
     'Respondent': '',
     'Disposed': 'JUDGEMENT ENTERED, 06/15/2018. Other'}]}],
 'counts': 'None',
 'docket': [{'date': '01-02-2018',
   'code': 'TEXT',
   'description': 'CIVIL MISC. INITIAL FILING.',
   'count': '1',
   'party': '',
   'amount': ''},
  {'date': '01-02-2018',
   'code': 'QUIET',
   'description': 'QUIET TITLE',
   'count': '',
   'party': '',
   'amount': ''},
  {'date': '01-02-2018',
   'code': 'DMFE',
   '