# Stream Filtering - KinesisAnalytics V1 API

The intent of this application is to filter the set of records written to one stream and land them in another stream as the output.

Currently the output appearing in the filtered stream is prefixed with the 'payload' column, so stream consumers would have to be aware of the convention and extract the converted payload.

Example - input to main stream:

```console
{
    "specversion": "1.0",
    "type": "newFoo",
    "source": "foo",
    "id": "19951489-bb5c-4376-b70d-06616a7bd9a2",
    "time": "2020-01-21T18:02:25.621391+00:00",
    "data": {
        "fooaddr": "foostuffval",
        "foolist": [
            1,
            2,
            3
        ],
        "barobj": {
            "baraatr1": "yes",
            "barattr2": false,
            "barattr3": 122.22
        }
    }
}
```

Record in filtered output stream:

```console
"payload": "{\"specversion\":\"1.0\",\"type\":\"newFoo\",\"source\":\"foo\",\"id\":\"2eb81374-438a-4471-aa7a-c8021d8496a4\",\"time\":\"2020-01-21T17:01:35.106678+00:00\",\"data\":{\"fooaddr\":\"foostuffval\",\"foolist\":[1,2,3],\"barobj\":{\"baraatr1\":\"yes\",\"barattr2\":false,\"barattr3\":122.22}}}"
```

## Setup

In [None]:
import boto3

kinesis_client = boto3.client('kinesis')

In [None]:
import os
account_no = os.environ['ACCOUNT_NO']

In [None]:
main_stream_response = kinesis_client.create_stream(
    StreamName='main', 
    ShardCount = 1)

kinesis_client.create_stream(StreamName='filtered', ShardCount=1)

In [None]:
kinesis_client.describe_stream(StreamName='main')

In [None]:
kinesis_client.describe_stream(StreamName='filtered')

## Stream Write

In [None]:
from datetime import datetime, timezone

def timestamp():
    the_time = datetime.now(timezone.utc)
    return the_time.isoformat()

In [None]:
import json

write_count = 10

for i in range (write_count - 1):
    event = {
        "specversion":"1.0",
        "type":"newFoo",
        "source":"foo",
        "id":str(uuid.uuid4()),
        "time":timestamp(),
        "data":{"fooaddr":"foostuffval",
               "foolist": [1,2,3],
               "barobj": {
                   "baraatr1":"yes",
                   "barattr2":False,
                   "barattr3":122.22
               }}
    }

    prr = kinesis_client.put_record(
        StreamName='main',
        Data=json.dumps(event).encode(),
        PartitionKey=event['source']
    )
    print(prr)

## Analytics App

### Role

In [None]:
kinesis_app_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "ReadInputKinesis",
            "Effect": "Allow",
            "Action": [
                "kinesis:DescribeStream",
                "kinesis:GetShardIterator",
                "kinesis:GetRecords"
            ],
            "Resource": [
                "arn:aws:kinesis:us-east-1:" + account_no + ":stream/main"
            ]
        },
        {
            "Sid": "WriteOutputKinesis",
            "Effect": "Allow",
            "Action": [
                "kinesis:DescribeStream",
                "kinesis:PutRecord",
                "kinesis:PutRecords"
            ],
            "Resource": [
                "arn:aws:kinesis:us-east-1:" + account_no + ":stream/filtered"
            ]
        }
    ]
}

In [None]:
assume_role_policy = {
    "Statement":[{
        "Effect":"Allow",
        "Principal": {"Service":["kinesisanalytics.amazonaws.com"]},
        "Action": ["sts:AssumeRole"]
    }]
}
    
    
import json

json.dumps(assume_role_policy)

In [None]:
iam = boto3.client('iam')

crr = iam.create_role(
    RoleName='sample-ka-app-role',
    Path='/service-role/',
    AssumeRolePolicyDocument=json.dumps(assume_role_policy)
)

print(crr)

In [None]:
prp = iam.put_role_policy(
    RoleName='sample-ka-app-role',
    PolicyName='KAPolicy',
    PolicyDocument=json.dumps(kinesis_app_policy)
)

print(prp)

### App Definition

In [None]:
ka = boto3.client('kinesisanalytics')

In [None]:
application_code = '-- ** Continuous Filter ** \n-- Performs a continuous filter based on a WHERE condition.\n--          .----------.   .----------.   .----------.              \n--          |  SOURCE  |   |  INSERT  |   |  DESTIN. |              \n-- Source-->|  STREAM  |-->| & SELECT |-->|  STREAM  |-->Destination\n--          |          |   |  (PUMP)  |   |          |              \n--          \'----------\'   \'----------\'   \'----------\'               \n-- STREAM (in-application): a continuously updated entity that you can SELECT from and INSERT into like a TABLE\n-- PUMP: an entity used to continuously \'SELECT ... FROM\' a source STREAM, and INSERT SQL results into an output STREAM\n-- Create output stream, which can be used to send to a destination\nCREATE OR REPLACE STREAM "DESTINATION_SQL_STREAM" ("payload" VARCHAR(32000));\n-- Create pump to insert into output \nCREATE OR REPLACE PUMP "STREAM_PUMP" AS INSERT INTO "DESTINATION_SQL_STREAM"\n-- Select all columns from source stream\nSELECT STREAM "payload"\nFROM "SOURCE_SQL_STREAM_001"\n-- LIKE compares a string to a string pattern (_ matches all char, % matches substring)\n-- SIMILAR TO compares string to a regex, may use ESCAPE\nWHERE "type" = \'newFoo\';\n'
print(application_code)

In [None]:
car = ka.create_application(
    ApplicationName='dave',
    ApplicationDescription='dave the app',
    Inputs=[
        {
            'NamePrefix': 'SOURCE_SQL_STREAM',
            "KinesisStreamsInput": {
                "ResourceARN":"arn:aws:kinesis:us-east-1:" + account_no + ":stream/main",
                "RoleARN":"arn:aws:iam::" + account_no + ":role/service-role/sample-ka-app-role"
            },
            "InputParallelism": {
                "Count": 1
            },
            "InputSchema": {
                "RecordFormat": {
                    "RecordFormatType":"JSON",
                    'MappingParameters': {
                        'JSONMappingParameters': {
                            'RecordRowPath': '$'
                        }
                    }
                },
                'RecordEncoding': 'UTF-8',
                'RecordColumns': [
                    {
                        'Name': 'type',
                        'Mapping': '$.type',
                         'SqlType': 'VARCHAR(64)'
                    },
                    {
                        'Name': 'payload', 
                        'Mapping': '$', 
                        'SqlType': 'VARCHAR(32000)'
                    }]
            }
        }
    ],
    Outputs=[
        {
                "Name":"DESTINATION_SQL_STREAM",
                "KinesisStreamsOutput": {
                    "ResourceARN":"arn:aws:kinesis:us-east-1:" + account_no + ":stream/filtered",
                    "RoleARN":"arn:aws:iam::" + account_no + ":role/service-role/sample-ka-app-role"
                },
                "DestinationSchema": {
                    'RecordFormatType': 'JSON'
                }
            }
    ],
    ApplicationCode=application_code
)

print(car)

In [None]:
dar = ka.describe_application(
    ApplicationName='dave'
)
print(dar)

In [None]:
input_description = dar['ApplicationDetail']['InputDescriptions'][0]
print(input_description)
input_id = input_description['InputId']
print(input_id)
create_timestamp = dar['ApplicationDetail']['CreateTimestamp']
print(create_timestamp)

In [None]:
sa = ka.start_application(
    ApplicationName='dave',
    InputConfigurations=[
        {
            'Id':input_id,
            'InputStartingPositionConfiguration': {
                'InputStartingPosition':'NOW'
            }
        }
    ]
)
print(sa)

## Clean Up

In [None]:
ka.stop_application(
    ApplicationName='dave'
)

In [None]:
ka.delete_application(
    ApplicationName='dave',
    CreateTimestamp=create_timestamp
)

In [None]:
kinesis_client.delete_stream(StreamName='main')
kinesis_client.delete_stream(StreamName='filtered')

In [None]:
iam.delete_role_policy(
    RoleName='sample-ka-app-role',
    PolicyName='KAPolicy'
)

In [None]:
iam.delete_role(
    RoleName='sample-ka-app-role'
)