In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName('Basic').getOrCreate()

In [3]:
df = spark.read.csv('/home/aravind/Downloads/table1.csv')

In [4]:
df.show()

+---+------+
|_c0|   _c1|
+---+------+
|age|  name|
|  1|   raj|
|  1|   ram|
|  2|ganesh|
|  2|   raj|
|  6|   ram|
|  4|ganesh|
| 99|   raj|
|122|   ram|
+---+------+



In [5]:
df.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)



In [6]:
from pyspark.sql.types import (StructField,StringType,IntegerType
                               ,StructType)

In [7]:
data_schema = [StructField('_c0',IntegerType(),True),
               StructField('_c1',StringType(),True)]

In [8]:
final_struc = StructType(fields=data_schema)

In [9]:
df = spark.read.csv('/home/aravind/Downloads/table1.csv',schema = final_struc)

In [10]:
df.printSchema()

root
 |-- _c0: integer (nullable = true)
 |-- _c1: string (nullable = true)



In [12]:
type(df["_c0"])

pyspark.sql.column.Column

In [13]:
df.select("_c0")

DataFrame[_c0: int]

In [14]:
df.select("_c0").show()

+----+
| _c0|
+----+
|null|
|   1|
|   1|
|   2|
|   2|
|   6|
|   4|
|  99|
| 122|
+----+



In [15]:
df.select("_c0","_c1").show()

+----+------+
| _c0|   _c1|
+----+------+
|null|  null|
|   1|   raj|
|   1|   ram|
|   2|ganesh|
|   2|   raj|
|   6|   ram|
|   4|ganesh|
|  99|   raj|
| 122|   ram|
+----+------+



In [16]:
type(df.select("_c0","_c1"))

pyspark.sql.dataframe.DataFrame

In [17]:
df.head(4)

[Row(_c0=None, _c1=None),
 Row(_c0=1, _c1='raj'),
 Row(_c0=1, _c1='ram'),
 Row(_c0=2, _c1='ganesh')]

In [18]:
type(df.head(4)[0])

pyspark.sql.types.Row

In [23]:
df.head(2)[1]

Row(_c0=1, _c1='raj')

In [24]:
df.select(['_c0','_c1']).show()

+----+------+
| _c0|   _c1|
+----+------+
|null|  null|
|   1|   raj|
|   1|   ram|
|   2|ganesh|
|   2|   raj|
|   6|   ram|
|   4|ganesh|
|  99|   raj|
| 122|   ram|
+----+------+



In [26]:
df.withColumn("new_c0",df['_c0']).show()

+----+------+------+
| _c0|   _c1|new_c0|
+----+------+------+
|null|  null|  null|
|   1|   raj|     1|
|   1|   ram|     1|
|   2|ganesh|     2|
|   2|   raj|     2|
|   6|   ram|     6|
|   4|ganesh|     4|
|  99|   raj|    99|
| 122|   ram|   122|
+----+------+------+



In [27]:
df.withColumn("new_c0 in double",df['_c0']*2).show()

+----+------+----------------+
| _c0|   _c1|new_c0 in double|
+----+------+----------------+
|null|  null|            null|
|   1|   raj|               2|
|   1|   ram|               2|
|   2|ganesh|               4|
|   2|   raj|               4|
|   6|   ram|              12|
|   4|ganesh|               8|
|  99|   raj|             198|
| 122|   ram|             244|
+----+------+----------------+



In [28]:
df.show()

+----+------+
| _c0|   _c1|
+----+------+
|null|  null|
|   1|   raj|
|   1|   ram|
|   2|ganesh|
|   2|   raj|
|   6|   ram|
|   4|ganesh|
|  99|   raj|
| 122|   ram|
+----+------+



In [31]:
df.withColumnRenamed('_c0','age').show()

+----+------+
| age|   _c1|
+----+------+
|null|  null|
|   1|   raj|
|   1|   ram|
|   2|ganesh|
|   2|   raj|
|   6|   ram|
|   4|ganesh|
|  99|   raj|
| 122|   ram|
+----+------+



In [59]:
df.createOrReplaceTempView('table1')

In [60]:
results = spark.sql("SELECT * FROM table1")


In [61]:
results.show() 

+----+------+
| _c0|   _c1|
+----+------+
|null|  null|
|   1|   raj|
|   1|   ram|
|   2|ganesh|
|   2|   raj|
|   6|   ram|
|   4|ganesh|
|  99|   raj|
| 122|   ram|
+----+------+



In [70]:
df.createOrReplaceTempView('table1')

In [71]:
new = spark.sql("SELECT * FROM table1 WHERE _c0=99")

In [72]:
new.show()

+---+---+
|_c0|_c1|
+---+---+
| 99|raj|
+---+---+

