In [1]:
simpleData = (("James", "Sales", 3000), \
    ("Michael", "Sales", 4600),  \
    ("Robert", "Sales", 4100),   \
    ("Maria", "Finance", 3000),  \
    ("James", "Sales", 3000),    \
    ("Scott", "Finance", 3300),  \
    ("Jen", "Finance", 3900),    \
    ("Jeff", "Marketing", 3000), \
    ("Kumar", "Marketing", 2000),\
    ("Saif", "Sales", 4100) \
  )

In [2]:
columns= ["employee_name", "department", "salary"]
df = spark.createDataFrame(data = simpleData, schema = columns)
df.printSchema()
df.show(truncate=False)

root
 |-- employee_name: string (nullable = true)
 |-- department: string (nullable = true)
 |-- salary: long (nullable = true)

+-------------+----------+------+
|employee_name|department|salary|
+-------------+----------+------+
|James        |Sales     |3000  |
|Michael      |Sales     |4600  |
|Robert       |Sales     |4100  |
|Maria        |Finance   |3000  |
|James        |Sales     |3000  |
|Scott        |Finance   |3300  |
|Jen          |Finance   |3900  |
|Jeff         |Marketing |3000  |
|Kumar        |Marketing |2000  |
|Saif         |Sales     |4100  |
+-------------+----------+------+



In [12]:
from pyspark.sql.functions import count, sum, col

In [14]:
df.agg(
  count('*').alias('number_of_rows'),
  sum('salary').alias('total_salary')
).show()

+--------------+------------+
|number_of_rows|total_salary|
+--------------+------------+
|            10|       34000|
+--------------+------------+



In [13]:
df.groupBy(col('department')).agg(
  count('*').alias('number_of_rows'),
  sum('salary').alias('total_salary')
).show()

+----------+--------------+------------+
|department|number_of_rows|total_salary|
+----------+--------------+------------+
|     Sales|             5|       18800|
|   Finance|             3|       10200|
| Marketing|             2|        5000|
+----------+--------------+------------+



In [8]:
from pyspark.sql.window import Window

In [9]:
windowDepartment = Window.partitionBy('department')

In [11]:
df.withColumn('totalSalary_withinPartition',sum('salary').over(windowDepartment)).show()

+-------------+----------+------+---------------------------+
|employee_name|department|salary|totalSalary_withinPartition|
+-------------+----------+------+---------------------------+
|         Saif|     Sales|  4100|                      18800|
|        James|     Sales|  3000|                      18800|
|        James|     Sales|  3000|                      18800|
|      Michael|     Sales|  4600|                      18800|
|       Robert|     Sales|  4100|                      18800|
|          Jen|   Finance|  3900|                      10200|
|        Maria|   Finance|  3000|                      10200|
|        Scott|   Finance|  3300|                      10200|
|         Jeff| Marketing|  3000|                       5000|
|        Kumar| Marketing|  2000|                       5000|
+-------------+----------+------+---------------------------+

