In [0]:
dbutils.fs.ls("FileStore/tables/")

Out[60]: [FileInfo(path='dbfs:/FileStore/tables/pollutiondata/', name='pollutiondata/', size=0, modificationTime=0),
 FileInfo(path='dbfs:/FileStore/tables/world_internet_user.csv', name='world_internet_user.csv', size=9797, modificationTime=1672274870000)]

###Read CSV File from folder

In [0]:
df = spark.read.format("csv").option("inferSchema", True).option("header", True).option("sep", ",").load("/FileStore/tables/world_internet_user.csv")

###Display the dataframe and count of records

In [0]:
df.display(10)

Country,Region,Population,Internet Users,% of Population
_World,,7920539977,5424080321,68.48
Afganistan,Asia,40403518,9237489,22.86
Albania,Europe,2872758,2191467,76.28
Algeria,Africa,45150879,37836425,83.8
American Samoa,Oceania,54995,34800,63.28
Andorra,Europe,77465,73101,94.37
Angola,Africa,34592611,8980670,25.96
Anguilla,America,15222,13665,89.77
Antarctica,Oceania,2700,2700,100.0
Antigua & Barbuda,America,99251,79731,80.33


In [0]:
df.count()


Out[63]: 243

### Add column, rename, drop

In [0]:
from pyspark.sql.functions import lit

In [0]:
df1 = df.withColumn("new_column", lit("sample data"))

In [0]:
df1.show(4)

+----------+------+----------+--------------+---------------+-----------+
|   Country|Region|Population|Internet Users|% of Population| new_column|
+----------+------+----------+--------------+---------------+-----------+
|    _World|  null|7920539977|    5424080321|          68.48|sample data|
|Afganistan|  Asia|  40403518|       9237489|          22.86|sample data|
|   Albania|Europe|   2872758|       2191467|          76.28|sample data|
|   Algeria|Africa|  45150879|      37836425|           83.8|sample data|
+----------+------+----------+--------------+---------------+-----------+
only showing top 4 rows



#### Add column by condition

In [0]:
df1 = df1.withColumn("NewPopulation", df1.Population * 2)

In [0]:
df1.show(4)

+----------+------+----------+--------------+---------------+-----------+-------------+
|   Country|Region|Population|Internet Users|% of Population| new_column|NewPopulation|
+----------+------+----------+--------------+---------------+-----------+-------------+
|    _World|  null|7920539977|    5424080321|          68.48|sample data|  15841079954|
|Afganistan|  Asia|  40403518|       9237489|          22.86|sample data|     80807036|
|   Albania|Europe|   2872758|       2191467|          76.28|sample data|      5745516|
|   Algeria|Africa|  45150879|      37836425|           83.8|sample data|     90301758|
+----------+------+----------+--------------+---------------+-----------+-------------+
only showing top 4 rows



#### Rename column

In [0]:
df1 = df1.withColumnRenamed("% of Population","PercentOfPopulation")

In [0]:
df1.show(3)

+----------+------+----------+--------------+-------------------+-----------+-------------+
|   Country|Region|Population|Internet Users|PercentOfPopulation| new_column|NewPopulation|
+----------+------+----------+--------------+-------------------+-----------+-------------+
|    _World|  null|7920539977|    5424080321|              68.48|sample data|  15841079954|
|Afganistan|  Asia|  40403518|       9237489|              22.86|sample data|     80807036|
|   Albania|Europe|   2872758|       2191467|              76.28|sample data|      5745516|
+----------+------+----------+--------------+-------------------+-----------+-------------+
only showing top 3 rows



#### Drop a Column

In [0]:
df1 = df1.drop("new_column")

In [0]:
df1.show()

+-----------------+-----------+----------+--------------+-------------------+-------------+
|          Country|     Region|Population|Internet Users|PercentOfPopulation|NewPopulation|
+-----------------+-----------+----------+--------------+-------------------+-------------+
|           _World|       null|7920539977|    5424080321|              68.48|  15841079954|
|       Afganistan|       Asia|  40403518|       9237489|              22.86|     80807036|
|          Albania|     Europe|   2872758|       2191467|              76.28|      5745516|
|          Algeria|     Africa|  45150879|      37836425|               83.8|     90301758|
|   American Samoa|    Oceania|     54995|         34800|              63.28|       109990|
|          Andorra|     Europe|     77465|         73101|              94.37|       154930|
|           Angola|     Africa|  34592611|       8980670|              25.96|     69185222|
|         Anguilla|    America|     15222|         13665|              89.77|   