2323import com .google .cloud .pubsublite .AdminClient ;
2424import com .google .cloud .pubsublite .SubscriptionPath ;
2525import com .google .cloud .pubsublite .TopicPath ;
26+ import com .google .cloud .pubsublite .spark .internal .CachedPartitionCountReader ;
27+ import com .google .cloud .pubsublite .spark .internal .LimitingHeadOffsetReader ;
28+ import com .google .cloud .pubsublite .spark .internal .PartitionCountReader ;
2629import java .util .Objects ;
2730import java .util .Optional ;
2831import org .apache .spark .sql .sources .DataSourceRegister ;
2932import org .apache .spark .sql .sources .v2 .ContinuousReadSupport ;
3033import org .apache .spark .sql .sources .v2 .DataSourceOptions ;
3134import org .apache .spark .sql .sources .v2 .DataSourceV2 ;
3235import org .apache .spark .sql .sources .v2 .MicroBatchReadSupport ;
36+ import org .apache .spark .sql .sources .v2 .StreamWriteSupport ;
3337import org .apache .spark .sql .sources .v2 .reader .streaming .ContinuousReader ;
3438import org .apache .spark .sql .sources .v2 .reader .streaming .MicroBatchReader ;
39+ import org .apache .spark .sql .sources .v2 .writer .streaming .StreamWriter ;
40+ import org .apache .spark .sql .streaming .OutputMode ;
3541import org .apache .spark .sql .types .StructType ;
3642
3743@ AutoService (DataSourceRegister .class )
3844public final class PslDataSource
39- implements DataSourceV2 , ContinuousReadSupport , MicroBatchReadSupport , DataSourceRegister {
45+ implements DataSourceV2 ,
46+ ContinuousReadSupport ,
47+ MicroBatchReadSupport ,
48+ StreamWriteSupport ,
49+ DataSourceRegister {
4050
4151 @ Override
4252 public String shortName () {
@@ -51,23 +61,24 @@ public ContinuousReader createContinuousReader(
5161 "PubSub Lite uses fixed schema and custom schema is not allowed" );
5262 }
5363
54- PslDataSourceOptions pslDataSourceOptions =
55- PslDataSourceOptions .fromSparkDataSourceOptions (options );
56- SubscriptionPath subscriptionPath = pslDataSourceOptions .subscriptionPath ();
64+ PslReadDataSourceOptions pslReadDataSourceOptions =
65+ PslReadDataSourceOptions .fromSparkDataSourceOptions (options );
66+ SubscriptionPath subscriptionPath = pslReadDataSourceOptions .subscriptionPath ();
5767 TopicPath topicPath ;
58- try (AdminClient adminClient = pslDataSourceOptions .newAdminClient ()) {
68+ try (AdminClient adminClient = pslReadDataSourceOptions .newAdminClient ()) {
5969 topicPath = TopicPath .parse (adminClient .getSubscription (subscriptionPath ).get ().getTopic ());
6070 } catch (Throwable t ) {
6171 throw toCanonical (t ).underlying ;
6272 }
6373 PartitionCountReader partitionCountReader =
64- new CachedPartitionCountReader (pslDataSourceOptions .newAdminClient (), topicPath );
74+ new CachedPartitionCountReader (pslReadDataSourceOptions .newAdminClient (), topicPath );
6575 return new PslContinuousReader (
66- pslDataSourceOptions .newCursorClient (),
67- pslDataSourceOptions .newMultiPartitionCommitter (partitionCountReader .getPartitionCount ()),
68- pslDataSourceOptions .getSubscriberFactory (),
76+ pslReadDataSourceOptions .newCursorClient (),
77+ pslReadDataSourceOptions .newMultiPartitionCommitter (
78+ partitionCountReader .getPartitionCount ()),
79+ pslReadDataSourceOptions .getSubscriberFactory (),
6980 subscriptionPath ,
70- Objects .requireNonNull (pslDataSourceOptions .flowControlSettings ()),
81+ Objects .requireNonNull (pslReadDataSourceOptions .flowControlSettings ()),
7182 partitionCountReader );
7283 }
7384
@@ -79,28 +90,38 @@ public MicroBatchReader createMicroBatchReader(
7990 "PubSub Lite uses fixed schema and custom schema is not allowed" );
8091 }
8192
82- PslDataSourceOptions pslDataSourceOptions =
83- PslDataSourceOptions .fromSparkDataSourceOptions (options );
84- SubscriptionPath subscriptionPath = pslDataSourceOptions .subscriptionPath ();
93+ PslReadDataSourceOptions pslReadDataSourceOptions =
94+ PslReadDataSourceOptions .fromSparkDataSourceOptions (options );
95+ SubscriptionPath subscriptionPath = pslReadDataSourceOptions .subscriptionPath ();
8596 TopicPath topicPath ;
86- try (AdminClient adminClient = pslDataSourceOptions .newAdminClient ()) {
97+ try (AdminClient adminClient = pslReadDataSourceOptions .newAdminClient ()) {
8798 topicPath = TopicPath .parse (adminClient .getSubscription (subscriptionPath ).get ().getTopic ());
8899 } catch (Throwable t ) {
89100 throw toCanonical (t ).underlying ;
90101 }
91102 PartitionCountReader partitionCountReader =
92- new CachedPartitionCountReader (pslDataSourceOptions .newAdminClient (), topicPath );
103+ new CachedPartitionCountReader (pslReadDataSourceOptions .newAdminClient (), topicPath );
93104 return new PslMicroBatchReader (
94- pslDataSourceOptions .newCursorClient (),
95- pslDataSourceOptions .newMultiPartitionCommitter (partitionCountReader .getPartitionCount ()),
96- pslDataSourceOptions .getSubscriberFactory (),
105+ pslReadDataSourceOptions .newCursorClient (),
106+ pslReadDataSourceOptions .newMultiPartitionCommitter (
107+ partitionCountReader .getPartitionCount ()),
108+ pslReadDataSourceOptions .getSubscriberFactory (),
97109 new LimitingHeadOffsetReader (
98- pslDataSourceOptions .newTopicStatsClient (),
110+ pslReadDataSourceOptions .newTopicStatsClient (),
99111 topicPath ,
100112 partitionCountReader ,
101113 Ticker .systemTicker ()),
102114 subscriptionPath ,
103- Objects .requireNonNull (pslDataSourceOptions .flowControlSettings ()),
104- pslDataSourceOptions .maxMessagesPerBatch ());
115+ Objects .requireNonNull (pslReadDataSourceOptions .flowControlSettings ()),
116+ pslReadDataSourceOptions .maxMessagesPerBatch ());
117+ }
118+
119+ @ Override
120+ public StreamWriter createStreamWriter (
121+ String queryId , StructType schema , OutputMode mode , DataSourceOptions options ) {
122+ PslSparkUtils .verifyWriteInputSchema (schema );
123+ PslWriteDataSourceOptions pslWriteDataSourceOptions =
124+ PslWriteDataSourceOptions .fromSparkDataSourceOptions (options );
125+ return new PslStreamWriter (schema , pslWriteDataSourceOptions );
105126 }
106127}
0 commit comments