# Spark Streaming Reading data from Text File (txt) using Structured Streaming API

In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pyspark.sql.functions as f

In [3]:
spark = SparkSession.builder\
.master("local[4]")\
.appName("ReadFromFile")\
.config("spark.driver.memory","2g")\
.config("spark.executor.memory", "4g")\
.getOrCreate()

In [4]:
lines = spark.readStream \
.format("text")\
.load("data")

### Splitting of all words from the text file

In [5]:
words = lines.select(f.explode(f.split(f.col("value"), " ")).alias("word"))

### Words are counting and sorting

In [6]:
word_counts = words.groupBy("word").count().sort(f.desc("count"))

In [7]:
query = word_counts.writeStream \
.outputMode("complete")\
.format("console")\
.start()

In [None]:
query.awaitTermination()

We choose writeStream format as console and we run the program by awaitTermination. After running the program will run and count words to the console.

In [None]:
query.stop()