In [1]:
from pyflink.table import (
    EnvironmentSettings,
    TableEnvironment,
    TableDescriptor,
    Schema,
    DataTypes,
    FormatDescriptor,
    EnvironmentSettings
)
from pyflink.table.window import Tumble,Slide, Over
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import expressions as expr
from pyflink.table.expressions import col,call, lit
from pyflink.table.udf import udf

env_settings = EnvironmentSettings.in_streaming_mode()
t_env = TableEnvironment.create(env_settings)    # Point to the SQL Kafka connector JAR
kafka_connector_jar = "file:///Users/gerardomartinez/DataEng/alpaca/flink-connectors/flink-sql-connector-kafka-3.3.0.jar"

t_env.get_config().set("pipeline.jars", kafka_connector_jar)

dockerC = "localhost:9092"

In [2]:
# # Define the Kafka source table
t_env.create_temporary_table(
    'bitcoin_table4',
    TableDescriptor.for_connector('kafka')
        .schema(Schema.new_builder()
                .column('name_coin', DataTypes.STRING())
                .column('moment_of_day', DataTypes.TIMESTAMP(3).not_null())
                .column('open_market', DataTypes.FLOAT())
                .column_by_expression("proc_time", "PROCTIME()")
        .watermark("moment_of_day", "moment_of_day - INTERVAL '1' SECOND")
                .build())
        .option('topic', 'bitcoin_price_2')
        .option('properties.bootstrap.servers', dockerC)
        .option('properties.group.id', 'transaction_group')
        .option('scan.startup.mode', 'earliest-offset')
        .format(FormatDescriptor.for_format('json')
                .option('fail-on-missing-field', 'false')
                .option('ignore-parse-errors', 'true')
                .build())
        .build())

In [3]:
t_env.from_path("bitcoin_table4").print_schema()

(
  `name_coin` STRING,
  `moment_of_day` TIMESTAMP(3) NOT NULL *ROWTIME*,
  `open_market` FLOAT,
  `proc_time` TIMESTAMP_LTZ(3) NOT NULL *PROCTIME* AS PROCTIME(),
  WATERMARK FOR `moment_of_day`: TIMESTAMP(3) NOT NULL AS moment_of_day - INTERVAL '1' SECOND
)


In [7]:
t_env.execute_sql(
    """
     SELECT 
    name_coin,    
    TUMBLE_START(proc_time, INTERVAL '1' MINUTE) AS rounded_minute,
    AVG(open_market) AS avg_open,
    LAG(AVG(open_market), 1) OVER  (
        PARTITION BY name_coin 
        ORDER BY TUMBLE_START(proc_time, INTERVAL '1' MINUTE)
    )  AS prev_avg_open
FROM bitcoin_table4
GROUP BY 
    name_coin,
    TUMBLE(proc_time, INTERVAL '1' MINUTE)
    """
).print()

TableException: org.apache.flink.table.api.TableException: OVER windows' ordering in stream mode must be defined on a time attribute.
	at org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecOverAggregate.translateToPlanInternal(StreamExecOverAggregate.java:175)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecEdge.translateToPlan(ExecEdge.java:259)
	at org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecCalc.translateToPlanInternal(CommonExecCalc.java:94)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecEdge.translateToPlan(ExecEdge.java:259)
	at org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecSink.translateToPlanInternal(StreamExecSink.java:177)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.delegation.StreamPlanner.$anonfun$translateToPlan$1(StreamPlanner.scala:85)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:233)
	at scala.collection.Iterator.foreach(Iterator.scala:937)
	at scala.collection.Iterator.foreach$(Iterator.scala:937)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
	at scala.collection.IterableLike.foreach(IterableLike.scala:70)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:69)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
	at scala.collection.TraversableLike.map(TraversableLike.scala:233)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:226)
	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
	at org.apache.flink.table.planner.delegation.StreamPlanner.translateToPlan(StreamPlanner.scala:84)
	at org.apache.flink.table.planner.delegation.PlannerBase.translate(PlannerBase.scala:180)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.translate(TableEnvironmentImpl.java:1308)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.executeInternal(TableEnvironmentImpl.java:1133)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.executeSql(TableEnvironmentImpl.java:735)
	at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
	at java.base/java.lang.reflect.Method.invoke(Method.java:580)
	at org.apache.flink.api.python.shaded.py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at org.apache.flink.api.python.shaded.py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at org.apache.flink.api.python.shaded.py4j.Gateway.invoke(Gateway.java:282)
	at org.apache.flink.api.python.shaded.py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at org.apache.flink.api.python.shaded.py4j.commands.CallCommand.execute(CallCommand.java:79)
	at org.apache.flink.api.python.shaded.py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.base/java.lang.Thread.run(Thread.java:1575)


In [3]:
t_env.execute_sql(
    """
      SELECT 
    name_coin,
    TUMBLE_START(proc_time, INTERVAL '1' MINUTE) AS rounded_minute,
    AVG(open_market) AS avg_open,
    LAG(AVG(open_market), 1) OVER (
        PARTITION BY name_coin 
        ORDER BY TUMBLE_START(proc_time, INTERVAL '1' MINUTE)
    ) AS prev_avg_open
FROM bitcoin_table4
GROUP BY 
    name_coin,
    TUMBLE(proc_time, INTERVAL '1' MINUTE)
    """
).print()

TableException: org.apache.flink.table.api.TableException: OVER windows' ordering in stream mode must be defined on a time attribute.
	at org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecOverAggregate.translateToPlanInternal(StreamExecOverAggregate.java:175)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecEdge.translateToPlan(ExecEdge.java:259)
	at org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecCalc.translateToPlanInternal(CommonExecCalc.java:94)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecEdge.translateToPlan(ExecEdge.java:259)
	at org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecSink.translateToPlanInternal(StreamExecSink.java:177)
	at org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase.translateToPlan(ExecNodeBase.java:168)
	at org.apache.flink.table.planner.delegation.StreamPlanner.$anonfun$translateToPlan$1(StreamPlanner.scala:85)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:233)
	at scala.collection.Iterator.foreach(Iterator.scala:937)
	at scala.collection.Iterator.foreach$(Iterator.scala:937)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
	at scala.collection.IterableLike.foreach(IterableLike.scala:70)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:69)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
	at scala.collection.TraversableLike.map(TraversableLike.scala:233)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:226)
	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
	at org.apache.flink.table.planner.delegation.StreamPlanner.translateToPlan(StreamPlanner.scala:84)
	at org.apache.flink.table.planner.delegation.PlannerBase.translate(PlannerBase.scala:180)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.translate(TableEnvironmentImpl.java:1308)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.executeInternal(TableEnvironmentImpl.java:1133)
	at org.apache.flink.table.api.internal.TableEnvironmentImpl.executeSql(TableEnvironmentImpl.java:735)
	at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
	at java.base/java.lang.reflect.Method.invoke(Method.java:580)
	at org.apache.flink.api.python.shaded.py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at org.apache.flink.api.python.shaded.py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at org.apache.flink.api.python.shaded.py4j.Gateway.invoke(Gateway.java:282)
	at org.apache.flink.api.python.shaded.py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at org.apache.flink.api.python.shaded.py4j.commands.CallCommand.execute(CallCommand.java:79)
	at org.apache.flink.api.python.shaded.py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.base/java.lang.Thread.run(Thread.java:1575)


In [3]:
t_env.execute_sql(
    """
      SELECT 
        name_coin,
        TUMBLE_START(proc_time, INTERVAL '1' MINUTE) AS rounded_minute,
        AVG(open_market) AS avg_open
    FROM bitcoin_table4
    GROUP BY 
        name_coin,
        TUMBLE(proc_time, INTERVAL '1' MINUTE)
    """
).print()

+----+--------------------------------+-------------------------+--------------------------------+
| op |                      name_coin |          rounded_minute |                       avg_open |
+----+--------------------------------+-------------------------+--------------------------------+
| +I |                        BTC/USD | 2025-02-04 10:42:00.000 |                       99217.33 |
| +I |                        BTC/USD | 2025-02-04 10:43:00.000 |                       99352.21 |
| +I |                        BTC/USD | 2025-02-04 10:44:00.000 |                      99242.875 |
| +I |                        BTC/USD | 2025-02-04 10:45:00.000 |                       99131.72 |
| +I |                        BTC/USD | 2025-02-04 10:46:00.000 |                        99145.2 |


ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/Users/gerardomartinez/DataEng/.venv/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/Users/gerardomartinez/DataEng/.venv/lib/python3.10/site-packages/py4j/java_gateway.py", line 1217, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/Users/gerardomartinez/.pyenv/versions/3.10.4/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 

| +I |                        BTC/USD | 2025-02-04 10:47:00.000 |                       99173.12 |
| +I |                        BTC/USD | 2025-02-04 10:48:00.000 |                       99266.83 |
| +I |                        BTC/USD | 2025-02-04 10:49:00.000 |                      99396.086 |


In [6]:
t_env.execute_sql(
    """
      SELECT 
        name_coin,
        moment_of_day,
        TUMBLE_START(proc_time, INTERVAL '1' MINUTE) AS rounded_minute,
        AVG(open_market) AS avg_open
    FROM bitcoin_table4
    GROUP BY 
        name_coin,
        moment_of_day,
        TUMBLE(proc_time, INTERVAL '1' MINUTE)
    """
).print()

+----+--------------------------------+-------------------------+-------------------------+--------------------------------+
| op |                      name_coin |           moment_of_day |          rounded_minute |                       avg_open |
+----+--------------------------------+-------------------------+-------------------------+--------------------------------+
| +I |                        BTC/USD | 2025-02-04 10:25:00.000 | 2025-02-04 10:40:00.000 |                        99027.0 |
| +I |                        BTC/USD | 2025-02-04 10:36:00.000 | 2025-02-04 10:40:00.000 |                       99371.53 |
| +I |                        BTC/USD | 2025-02-04 10:34:00.000 | 2025-02-04 10:40:00.000 |                       99106.01 |
| +I |                        BTC/USD | 2025-02-04 10:39:00.000 | 2025-02-04 10:40:00.000 |                       99418.23 |
| +I |                        BTC/USD | 2025-02-04 10:30:00.000 | 2025-02-04 10:40:00.000 |                      99068.086 |


ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/Users/gerardomartinez/DataEng/.venv/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/Users/gerardomartinez/DataEng/.venv/lib/python3.10/site-packages/py4j/java_gateway.py", line 1217, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/Users/gerardomartinez/.pyenv/versions/3.10.4/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 