## Run the dependant pipelines

In [None]:
%%bash
cd /pipelines
KNESSET_LOAD_FROM_URL=1 KNESSET_DATASERVICE_INCREMENTAL= \
  dpp run --no-use-cache --concurrency 2 --verbose \
    ./committees/kns_committeesession,./members/mk_individual

## Inspect the source data

Choose a committee session ID to focus on, make sure it has all the fields

In [23]:
CommitteeSessionID = 2059313

In [26]:
from dataflows import Flow, load, printer, filter_rows

committeesession_data = Flow(
    load('/pipelines/data/committees/kns_committeesession/datapackage.json'),
    filter_rows(lambda row: row['CommitteeSessionID'] == CommitteeSessionID),
    printer(tablefmt='html')
).results()

#,CommitteeSessionID (integer),Number (integer),KnessetNum (integer),TypeID (integer),TypeDesc (string),CommitteeID (integer),Location (string),SessionUrl (string),BroadcastUrl (string),StartDate (datetime),FinishDate (datetime),Note (string),LastUpdatedDate (datetime),download_crc32c (string),download_filename (string),download_filesize (integer),parts_crc32c (string),parts_filesize (integer),parts_parsed_filename (string),text_crc32c (string),text_filesize (integer),text_parsed_filename (string),topics (array),committee_name (string)
1,2059313,462,15,161,פתוחה,2,"חדר הוועדה, באגף הוועדות (קדמה), קומה 3, חדר 3750",http://main.knesset.gov.il/Activity/committees/Pages/AllCommitteesAgenda.aspx?Tab=3&ItemID=2059313,,2002-11-19 00:00:00,2002-11-19 00:00:00,"הצעת חוק ההסדרים במשק המדינה (תיקוני חקיקה להשגת יעדי התקציב והמדיניות הכלכלית לשנת הכספים 2003, התש ...",2018-10-10 11:03:06,+lu4+A==,files/23/4/3/430592.DOC,91162,x9rEiQ==,194576,files/2/0/2059313.csv,jD1Riw==,195031,files/2/0/2059313.txt,,הכספים


## Download the protocol text

In [38]:
import os
text_url = 'https://storage.googleapis.com/knesset-data-pipelines/data/committees/meeting_protocols_text/{}'.format(committeesession_data[0][0][0]['text_parsed_filename'])
filename = '/pipelines/data/committees/meeting_protocols_text/{}'.format(committeesession_data[0][0][0]['text_parsed_filename'])
os.makedirs(os.path.dirname(filename), exist_ok=True)
cmd = 'curl {} > {}'.format(text_url, filename)
!{cmd}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  190k  100  190k    0     0   285k      0 --:--:-- --:--:-- --:--:--  285k


## Modify the pipeline yaml to run on the selected committee session ID

Under `committee-meeting-attendees:` set the following to parse a single meeting (+add cache):

```
  - run: filter
    cache: true
    parameters:
      resources: kns_committeesession
      in:
      - CommitteeSessionID: 2068104
  - run: committee_meeting_attendees
    # parameters:
    #   filter-meeting-id: [2068104]
```

## Delete the cache hash and run the pipeline

In [47]:
%%bash
cd /pipelines
rm -rf data/people/committees/meeting-attendees/cache_hash
KNESSET_DATASERVICE_INCREMENTAL= \
  dpp run --verbose \
    ./people/committee-meeting-attendees

[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 RUNNING ./people/committee-meeting-attendees
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Collecting dependencies
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Running async task
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Waiting for completion
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Async task starting
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Searching for existing caches
[./people/committee-meeting-attendees:T_0] >>> INFO    :Found cache for step 3: filter
[./people/committee-meeting-attendees:T_0] >>> INFO    :8a208434 Building process chain:
[./people/committee-meeting-attendees:T_0] >>> INFO    :- cache_loader
[./people/committee-meeting-attendees:T_0] >>> INFO    :- committee_meeting_attendees
[./people/committee-meeting-attendees:T_0] >>> INFO    :- join_committee_meeting_attendees_mks
[./people/committee-m

## Inspect the data

In [48]:
from dataflows import Flow, load, printer

Flow(
    load('/pipelines/data/people/committees/meeting-attendees/datapackage.json'),
    printer(tablefmt='html')
).process()

#,CommitteeSessionID (integer),Number (integer),KnessetNum (integer),TypeID (integer),TypeDesc (string),CommitteeID (integer),Location (string),SessionUrl (string),BroadcastUrl (string),StartDate (datetime),FinishDate (datetime),Note (string),LastUpdatedDate (datetime),download_crc32c (string),download_filename (string),download_filesize (integer),parts_crc32c (string),parts_filesize (integer),parts_parsed_filename (string),text_crc32c (string),text_filesize (integer),text_parsed_filename (string),topics (array),committee_name (string),mks (array),invitees (array),legal_advisors (array),manager (array),financial_advisors (array),attended_mk_individual_ids (array)
1,2059313,462,15,161,פתוחה,2,"חדר הוועדה, באגף הוועדות (קדמה), קומה 3, חדר 3750",http://main.knesset.gov.il/Activity/committees/Pages/AllCommitteesAgenda.aspx?Tab=3&ItemID=2059313,,2002-11-19 00:00:00,2002-11-19 00:00:00,"הצעת חוק ההסדרים במשק המדינה (תיקוני חקיקה להשגת יעדי התקציב והמדיניות הכלכלית לשנת הכספים 2003, התש ...",2018-10-10 11:03:06,+lu4+A==,files/23/4/3/430592.DOC,91162,x9rEiQ==,194576,files/2/0/2059313.csv,jD1Riw==,195031,files/2/0/2059313.txt,,הכספים,"['יעקב ליצמן - היו""ר', 'אבשלום וילן', 'עופר חוגי', 'אמנון כהן', 'רחמים מלול', 'משולם נהרי']","[{'name': 'חה""כ צבי הנדל'}, {'name': 'חה""כ עמיר פרץ'}, {'name': 'יעקב ניזרי-סמנכ""ל שירות התעסוקה, מש ...","['שגית אפיק', 'ליאורה סידי (מתמחה)', 'אנה שניידר']",['טמיר כהן'],[],"[105, 203, 46, 207, 210, 216, 219, 222]"


(<datapackage.package.Package at 0x7f83c0bd1748>, {})