In [28]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
from optimus import *

from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, ArrayType

sc = SparkSession.builder.getOrCreate()

In [30]:
# Create optimus
op = Optimus(sc)

Using a created Spark Session...
Done.


## Create dataframe
### Spark

This is ugly:

```
val someData = Seq(
  Row(8, "bat"),
  Row(64, "mouse"),
  Row(-27, "horse")
)

val someSchema = List(
  StructField("number", IntegerType, true),
  StructField("word", StringType, true)
)

val someDF = spark.createDataFrame(
  spark.sparkContext.parallelize(someData),
  StructType(someSchema)
)```

In [22]:
# Thanks Mr Powers
df = op.create.df([
                ("words", "str", True),
                ("num", "int", True),
                ("animals", "str", True),
                ("thing", StringType(), True),
                ("second", "int", True),
                ("filter", StringType(), True)
            ],
            [
                ("  I like     fish  ", 1, "dog dog", "housé", 5 , "a"),
                ("    zombies", 2, "cat", "tv", 6, "b"),
                ("simpsons   cat lady", 2, "frog", "table", 7, "1"),
                (None, 3, "eagle", "glass", 8, "c")
                
            ])

df.show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  1|dog dog|housé|     5|     a|
|            zombies|  2|    cat|   tv|     6|     b|
|simpsons   cat lady|  2|   frog|table|     7|     1|
|               null|  3|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



## Append row
### Spark
Not available in Spark. You need to create a dataframe and the union to append a row

In [27]:
df.rows().append(["this is a word",2, "this is an animal", "this is a thing", 64, "this is a filter"]).show()

+-------------------+---+-----------------+---------------+------+----------------+
|              words|num|          animals|          thing|second|          filter|
+-------------------+---+-----------------+---------------+------+----------------+
|  I like     fish  |  1|          dog dog|          housé|     5|               a|
|            zombies|  2|              cat|             tv|     6|               b|
|simpsons   cat lady|  2|             frog|          table|     7|               1|
|               null|  3|            eagle|          glass|     8|               c|
|     this is a word|  2|this is an animal|this is a thing|    64|this is a filter|
+-------------------+---+-----------------+---------------+------+----------------+



## Drop Row
### Spark 

### Pandas

In [43]:
df.rows().drop(1).show()

id_optimus
+-------------------+---+-------+-----+------+------+-----------+
|              words|num|animals|thing|second|filter| id_optimus|
+-------------------+---+-------+-----+------+------+-----------+
|  I like     fish  |  1|dog dog|housé|     5|     a| 8589934592|
|            zombies|  2|    cat|   tv|     6|     b|25769803776|
|simpsons   cat lady|  2|   frog|table|     7|     1|42949672960|
|               null|  3|  eagle|glass|     8|     c|60129542144|
+-------------------+---+-------+-----+------+------+-----------+



## Filter by type
### Spark 
Not available in Spark Vanilla.

### Pandas


In [87]:
df.rows().filter_by_type("filter", type = "integer").show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  1|dog dog|housé|     5|     a|
|            zombies|  2|    cat|   tv|     6|     b|
|               null|  3|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



## Lookup
### Spark
Not available in Spark Vanilla.

### Pandas

In [88]:
df.rows().lookup("animals", ["dog", "cat", "eagle"], "just animals").show()

+-------------------+---+------------+-----+------+------+
|              words|num|     animals|thing|second|filter|
+-------------------+---+------------+-----+------+------+
|  I like     fish  |  1|     dog dog|housé|     5|     a|
|            zombies|  2|just animals|   tv|     6|     b|
|simpsons   cat lady|  2|        frog|table|     7|     1|
|               null|  3|just animals|glass|     8|     c|
+-------------------+---+------------+-----+------+------+



## Apply by type
### Spark
Not available in Spark Vanilla.

### Pandas


In [120]:
def func(value): 
    return str(int(value) + 1 )

df.rows().apply_by_type([('num', 'integer', func)]).show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  2|dog dog|housé|     5|     a|
|            zombies|  3|    cat|   tv|     6|     b|
|simpsons   cat lady|  3|   frog|table|     7|     1|
|               null|  4|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+

