In [2]:
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

spark = SparkSession.builder.master("local").appName("create-dataframe").getOrCreate()

stocks = [
    ('Google', 'GOOGL', 'USA', 2984, 'USD'),
    ('Netflix', 'NFLX', 'USA', 645, 'USD'),
    ('Tesla', 'TSLA', 'USA', 1222, 'USD'),
    ('Samsung', '005930', 'Korea', 125000, 'KRW'),
    ('Kakao', '035720', 'Korea', 125000, 'USD'),
]

schema = ["name", "ticker", "country", "price", "currency"]

df=spark.createDataFrame(data=stocks, schema=schema)

In [3]:
df.show()

                                                                                

+-------+------+-------+------+--------+
|   name|ticker|country| price|currency|
+-------+------+-------+------+--------+
| Google| GOOGL|    USA|  2984|     USD|
|Netflix|  NFLX|    USA|   645|     USD|
|  Tesla|  TSLA|    USA|  1222|     USD|
|Samsung|005930|  Korea|125000|     KRW|
|  Kakao|035720|  Korea|125000|     USD|
+-------+------+-------+------+--------+



In [5]:
usaStocksDF = df.select("name", "country", "price").where("country == 'USA'").orderBy("price")
usaStocksDF.show()

+-------+-------+-----+
|   name|country|price|
+-------+-------+-----+
|Netflix|    USA|  645|
|  Tesla|    USA| 1222|
| Google|    USA| 2984|
+-------+-------+-----+



In [6]:
df.groupBy("country").max("price").show()

[Stage 2:>                                                          (0 + 1) / 1]

+-------+----------+
|country|max(price)|
+-------+----------+
|    USA|      2984|
|  Korea|    125000|
+-------+----------+



                                                                                

In [8]:
from pyspark.sql.functions import avg, count
df.groupBy("currency").agg(avg("price")).show()

+--------+----------+
|currency|avg(price)|
+--------+----------+
|     KRW|  125000.0|
|     USD|  32462.75|
+--------+----------+



In [10]:
df.groupBy("currency").agg(count("price")).show()

+--------+------------+
|currency|count(price)|
+--------+------------+
|     KRW|           1|
|     USD|           4|
+--------+------------+

