# Kafka Summit 2024

## Install Kafka / Redpanda

https://kafka.apache.org/quickstart

## Install Apache Flink

https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/try-flink/local_installation/

## Install Python packages

In [ ]:
!pip install -r requirements.txt

## Let's start

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:

import json

import pandas as pd
from confluent_kafka import Producer, Consumer, KafkaException
from confluent_kafka.admin import AdminClient, NewTopic

## Kafka config

In [5]:
with open('../config.json') as fp:
  config_obj = json.load(fp)
config_obj

{'flink.servers': 'localhost:8081',
 'kafka.brokers': 'localhost:51932',
 'kafka.topic.price.name': 'local.price',
 'kafka.topic.holding.name': 'local.holding',
 'kafka.topic.order.name': 'local.order'}

In [6]:
conf = {
  'bootstrap.servers': config_obj['kafka.brokers']
}
producer = Producer(conf)

price_topic_name = config_obj['kafka.topic.price.name']
holding_topic_name = config_obj['kafka.topic.holding.name']
order_topic_name = config_obj['kafka.topic.order.name']

## Create topics

In [25]:
admin = AdminClient(conf)

price_topic = NewTopic(price_topic_name, num_partitions=1, replication_factor=1)
admin.create_topics([price_topic])

holding_topic = NewTopic(holding_topic_name, num_partitions=3, replication_factor=1)
admin.create_topics([holding_topic])

order_topic = NewTopic(order_topic_name, num_partitions=3, replication_factor=1)
admin.create_topics([order_topic])

{'local.order': <Future at 0x159112700 state=running>}

In [5]:
!rpk cluster info

CLUSTER
redpanda.998fd8e2-ed31-46f1-96d4-20f7bd02855b

BROKERS
ID    HOST       PORT
0*    127.0.0.1  51932

TOPICS
NAME                PARTITIONS  REPLICAS
__consumer_offsets  3           1
holding_topic       1           1
local.holding       3           1
local.order         3           1
local.price         3           1



## Prepare sample price data

In [15]:
prices_df = pd.read_csv('../data/prices.csv')
prices_df.drop(['ipoDate', 'delistingDate'], inplace=True, axis=1)
# Lazily remove any broken data
prices_df = prices_df[~prices_df['price'].isna()]
prices_df = prices_df[~prices_df['name'].isna()]
prices_df = prices_df[~prices_df['symbol'].isna()]
prices_df


Unnamed: 0,symbol,name,exchange,assetType,status,price
0,A,Agilent Technologies Inc,NYSE,Stock,Active,147.39
1,AA,Alcoa Corp,NYSE,Stock,Active,29.67
2,AAA,AXS First Priority CLO Bond ETF,NYSE ARCA,ETF,Active,25.14
3,AAAU,Goldman Sachs Physical Gold ETF,BATS,ETF,Active,21.36
4,AACG,ATA Creativity Global,NASDAQ,Stock,Active,1.40
...,...,...,...,...,...,...
11597,ZWS,Zurn Elkay Water Solutions Corp,NYSE,Stock,Active,32.13
11601,ZYME,Zymeworks BC Inc,NASDAQ,Stock,Active,10.53
11603,ZYRX,Global Earnings Capital Ltd,NASDAQ,Stock,Active,161.00
11604,ZYXI,Zynex Inc,NASDAQ,Stock,Active,12.93


In [16]:
throttled_price = {
  'asOf': '2024-03-24T12:23:34.000',
  'prices': {i['symbol']: i['price'] for i in prices_df.to_dict(orient='records')}
}
throttled_price

{'asOf': '2024-03-24T12:23:34.000',
 'prices': {'A': 147.39,
  'AA': 29.67,
  'AAA': 25.14,
  'AAAU': 21.36,
  'AACG': 1.4,
  'AACI': 11.1,
  'AACIU': 11.12,
  'AACIW': 0.09,
  'AACT': 10.57,
  'AADI': 2.13,
  'AADR': 61.12,
  'AAGR': 0.45,
  'AAGRW': 0.01,
  'AAL': 13.93,
  'AAMC': 3.53,
  'AAME': 2.8,
  'AAN': 6.99,
  'AAOI': 13.6,
  'AAON': 85.18,
  'AAP': 83.83,
  'AAPB': 18.64,
  'AAPD': 22.1,
  'AAPL': 176.08,
  'AAPU': 23.45,
  'AAPX': 21.75,
  'AAPY': 24.49,
  'AAT': 21.13,
  'AAU': 0.14,
  'AAXJ': 67.21,
  'AB': 33.64,
  'ABAT': 1.99,
  'ABBV': 179.66,
  'ABCB': 46.12,
  'ABCL': 4.6,
  'ABCS': 26.31,
  'ABEO': 7.57,
  'ABEQ': 29.61,
  'ABEV': 2.47,
  'ABG': 209.21,
  'ABIO': 1.74,
  'ABL': 12.18,
  'ABLLL': 25.25,
  'ABLLW': 0.6,
  'ABLV': 3.01,
  'ABLVW': 0.04,
  'ABM': 43.37,
  'ABNB': 161.79,
  'ABOS': 4.0,
  'ABR': 12.61,
  'ABSI': 4.81,
  'ABT': 113.16,
  'ABTS': 0.75,
  'ABUS': 2.5,
  'ABVC': 1.12,
  'ABVX': 14.71,
  'AC': 33.21,
  'ACA': 82.1,
  'ACAB': 10.73,
  'ACABU'

## Publish price data

In [34]:
try:
  producer.produce(price_topic_name, key='2024-03-27', value=json.dumps(throttled_price))
except Exception as e:
  print(f'Failed to publish prices')
producer.flush()
print(f'{len(prices_df)} items produced.')

9886 items produced.


## Prepare holding data (SPY)

In [7]:
holding_df = pd.read_excel('../data/holdings-daily-us-en-spy.xlsx', skiprows=4)
holding_df = holding_df[~holding_df['Ticker'].isna()]
holding_df.rename(
    columns={'Shares Held': 'sodQty', 'Ticker': 'symbol', 'Local Currency': 'currency',
             'Name': 'name'}, inplace=True)
holding_df.drop(['Identifier', 'SEDOL', 'Weight', 'Sector'], axis=1, inplace=True)
holding_df['orderQty'] = 0
holding_df

Unnamed: 0,name,symbol,sodQty,currency,orderQty
0,MICROSOFT CORP,MSFT,90771106.0,USD,0
1,APPLE INC,AAPL,177321492.0,USD,0
2,NVIDIA CORP,NVDA,30173549.0,USD,0
3,AMAZON.COM INC,AMZN,111666519.0,USD,0
4,META PLATFORMS INC CLASS A,META,26876014.0,USD,0
...,...,...,...,...,...
499,MOHAWK INDUSTRIES INC,MHK,627392.0,USD,0
500,PARAMOUNT GLOBAL CLASS B,PARA,5898665.0,USD,0
501,VF CORP,VFC,4049110.0,USD,0
502,FOX CORP CLASS B,FOX,1613107.0,USD,0


In [39]:
account_holding = {
  'accountCode': 'BBB',
  'parentAccount': 'SPY',
  'holdings': holding_df.to_dict(orient='records')
}
account_holding

{'accountCode': 'BBB',
 'parentAccount': 'SPY',
 'holdings': [{'name': 'MICROSOFT CORP',
   'symbol': 'MSFT',
   'sodQty': 90771106.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'APPLE INC',
   'symbol': 'AAPL',
   'sodQty': 177321492.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'NVIDIA CORP',
   'symbol': 'NVDA',
   'sodQty': 30173549.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'AMAZON.COM INC',
   'symbol': 'AMZN',
   'sodQty': 111666519.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'META PLATFORMS INC CLASS A',
   'symbol': 'META',
   'sodQty': 26876014.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'ALPHABET INC CL A',
   'symbol': 'GOOGL',
   'sodQty': 71989916.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'BERKSHIRE HATHAWAY INC CL B',
   'symbol': 'BRK.B',
   'sodQty': 22210790.0,
   'currency': 'USD',
   'orderQty': 0},
  {'name': 'ALPHABET INC CL C',
   'symbol': 'GOOG',
   'sodQty': 60271657.0,
   'currency': 'USD',
   '

## Publish holding data (SPY)

In [40]:
try:
  producer.produce(holding_topic_name, key=account_holding['accountCode'],
                   value=json.dumps(account_holding))
except Exception as e:
  print(f'Failed to publish {account_holding["accountCode"]}')

producer.flush()
print(f'{account_holding["accountCode"]} {len(holding_df)} items produced.')

BBB 504 items produced.


In [16]:
consumer = Consumer({
  **conf,
  'group.id': 'test-client-1',
  'auto.offset.reset': 'earliest'
})

consumer.subscribe([price_topic_name])
msg = consumer.poll(1.0)
if msg is None:
  print("No messages")
elif msg.error():
  raise KafkaException(msg.error())
else:
  print(f'Received message: {msg.key().decode("utf-8")} {msg.value().decode("utf-8")}')

consumer.close

Received message: A {"symbol": "A", "name": "Agilent Technologies Inc", "exchange": "NYSE", "assetType": "Stock", "status": "Active", "price": 147.39}


<function Consumer.close>

In [19]:
consumer = Consumer({
  **conf,
  'group.id': 'test-client-1',
  'auto.offset.reset': 'earliest'
})

consumer.subscribe([holding_topic_name])
msg = consumer.poll(1.0)
if msg is None:
  print("No messages")
elif msg.error():
  raise KafkaException(msg.error())
else:
  print(f'Received message: {msg.key().decode("utf-8")} {msg.value().decode("utf-8")}')

consumer.close

No messages


<function Consumer.close>