In [14]:
#pip install pandas

In [15]:
#pip install ksql

In [16]:
import logging
from ksql import KSQLAPI
import pandas as pd
import datetime
import time
import random

In [17]:
# 카프카에 저장된 데이터를 추가, 조회 가능한 KSQLDB 접속
# API 형식으로 카프카의 토픽(테이블)에 데이터를 추가 및 조회 가능한 객체 생성
client = KSQLAPI('http://localhost:8089', timeout= None)

In [18]:
result = client.ksql('show topics;')

In [19]:
result

[{'@type': 'kafka_topics',
  'statementText': 'show topics;',
  'topics': [{'name': 'bus_arrive_topic', 'replicaInfo': [1]},
   {'name': 'bus_location_topic', 'replicaInfo': [1]},
   {'name': 'car_arrive_topic', 'replicaInfo': [1]},
   {'name': 'car_location_topic', 'replicaInfo': [1]},
   {'name': 'default_ksql_processing_log', 'replicaInfo': [1]}],

In [20]:
# 버스 위치 정보가 저장된 location.csv 읽기

location_df = pd.read_csv("location.csv")

In [21]:
location_df.sort_values(["date", "now_arrive_time"], inplace=True)

In [22]:
location_df.reset_index(drop=True, inplace=True)

In [23]:
location_df.head()

Unnamed: 0,id,date,route_id,vh_id,route_nm,now_latitude,now_longitude,now_station,now_arrive_time,distance,next_station,next_latitude,next_longitude
0,210985,2019-10-29,405136001,7997040,360-1,33.49956,126.529771,제주시청(아라방면),05시,454.0,고산동산(아라방면),33.496007,126.532287
1,210986,2019-10-29,405136001,7997040,360-1,33.496007,126.532287,고산동산(아라방면),05시,414.0,제주지방법원(아라방면),33.493109,126.535091
2,210987,2019-10-29,405136001,7997040,360-1,33.493109,126.535091,제주지방법원(아라방면),05시,525.0,제주중앙여자고등학교(아라방면),33.489414,126.538642
3,210988,2019-10-29,405136001,7997040,360-1,33.479705,126.543811,남국원(아라방면),05시,355.0,아라초등학교,33.476533,126.544283
4,210989,2019-10-29,405136001,7997040,360-1,33.456267,126.55175,제주대학교입구,05시,266.0,제대마을,33.457724,126.554014


In [24]:
location_df.loc[0]

id                     210985
date               2019-10-29
route_id            405136001
vh_id                 7997040
route_nm                360-1
now_latitude         33.49956
now_longitude      126.529771
now_station        제주시청(아라방면)
now_arrive_time           05시
distance                454.0
next_station       고산동산(아라방면)
next_latitude       33.496007
next_longitude     126.532287
Name: 0, dtype: object

In [25]:
#카프카에 전송할 데이터 수
count = len(location_df)
count

91774

## 각 버스들의 위치를 `카프카(Kafka)`로 전송

In [26]:
index = 0

while True:
    if index >= count:
        index = 0

    now = datetime.datetime.now()
    print(f"현재 시각 : {now}")

    date = now.strftime("%Y-%m-%d")
    print(f"현재 날짜 리턴 : {date}")
    
    now_arrive_time = now.hour
    weekday = now.weekday()
    print(f"현재 요일 : {weekday}")

    location_row = location_df.loc[index]

    route_id = location_df.loc[index,"route_id"]
    vh_id = location_df.loc[index,"vh_id"]
    route_nm = location_df.loc[index,"route_nm"]
    now_latitude = round(location_df.loc[index,"now_latitude"],6)
    now_longitude = round(location_df.loc[index,"now_longitude"],6)
    now_station = location_df.loc[index,"now_station"]
    distance = location_df.loc[index,"distance"]
    next_latitude = round(location_df.loc[index,"next_latitude"],6)
    next_longitude = round(location_df.loc[index,"next_longitude"],6)
    next_station = location_df.loc[index,"next_station"]
    
    # 현재 날짜와 시간을 정수로 id 저장
    id = int(now.timestamp())

    insert_query = f"""INSERT INTO bus_location_topic (
                    id
                    ,route_id
                    ,vh_id
                    ,route_nm
                    ,now_latitude
                    ,now_longitude
                    ,now_station
                    ,now_arrive_time
                    ,distance
                    ,next_station
                    ,next_latitude
                    ,next_longitude
                    ,weekday
                    ,date
                    ) VALUES ({id},'{route_id}','{vh_id}'
                            ,'{route_nm}',{now_latitude}
                            ,{now_longitude},'{now_station}'
                            ,{now_arrive_time},{distance}
                            ,'{next_station}',{next_latitude}
                            ,{next_longitude},{weekday}
                            ,'{date}'
                            );"""
    print(f"insert query :\n{insert_query}")

    try:
        client.ksql(insert_query) # KSQLDB에 현재 위치 추가
    except Exception as e:
        print(f"Exception : \n{e}")

    # # 30초 대기, 주기
    # time.sleep(30)

    # 대기시간을 30초-> 2분~10분의 랜덤값으로 변경
    ## 30초로 실행 시, 카프카와 스파크 모델, 이후 Spring boot까지 실행할 때
    ### 컴퓨터 부담이 심해서 이와 같이 변경.
    sleep_time = random.randrange(120,600)
    print(f"대기 시간 : {sleep_time}")
    time.sleep(sleep_time)
    
    index +=1
    print("="*100)

현재 시각 : 2023-10-17 14:09:02.419917
현재 날짜 리턴 : 2023-10-17
현재 요일 : 1
insert query :
INSERT INTO bus_location_topic (
                    id
                    ,route_id
                    ,vh_id
                    ,route_nm
                    ,now_latitude
                    ,now_longitude
                    ,now_station
                    ,now_arrive_time
                    ,distance
                    ,next_station
                    ,next_latitude
                    ,next_longitude
                    ,weekday
                    ,date
                    ) VALUES (1697519342,'405136001','7997040'
                            ,'360-1',33.49956
                            ,126.529771,'제주시청(아라방면)'
                            ,14,454.0
                            ,'고산동산(아라방면)',33.496007
                            ,126.532287,1
                            ,'2023-10-17'
                            );
Exception : 
list index out of range
대기 시간 : 575
현재 시각 : 2023-10-17 14:18:37.72