In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
from optimus import *

from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, ArrayType

sc = SparkSession.builder.getOrCreate()

In [13]:
# Create optimus
op = Optimus()


             ____        __  _                     
            / __ \____  / /_(_)___ ___  __  _______
           / / / / __ \/ __/ / __ `__ \/ / / / ___/
          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
              /_/                                  
              
Just checking that all necessary environments vars are present...
-----
PYSPARK_PYTHON=python
SPARK_HOME=C:\opt\spark\spark-2.3.1-bin-hadoop2.7
JAVA_HOME=C:\java8
-----
Starting or getting SparkSession and SparkContext...
Setting checkpoint folder (local). If you are in a cluster change it with set_check_point_ folder(path,'hadoop').
Deleting previous folder if exists...
Creating the checkpoint directory...
Done.
Optimus successfully imported. Have fun :).


## Create dataframe
### Spark

This is ugly:

```
val someData = Seq(
  Row(8, "bat"),
  Row(64, "mouse"),
  Row(-27, "horse")
)

val someSchema = List(
  StructField("number", IntegerType, true),
  StructField("word", StringType, true)
)

val someDF = spark.createDataFrame(
  spark.sparkContext.parallelize(someData),
  StructType(someSchema)
)```

In [14]:
# Thanks Mr Powers
df = op.create.df([
                ("words", "str", True),
                ("num", "int", True),
                ("animals", "str", True),
                ("thing", StringType(), True),
                ("second", "int", True),
                ("filter", StringType(), True)
            ],
            [
                ("  I like     fish  ", 1, "dog dog", "housé", 5 , "a"),
                ("    zombies", 2, "cat", "tv", 6, "b"),
                ("simpsons   cat lady", 2, "frog", "table", 7, "1"),
                (None, 3, "eagle", "glass", 8, "c")
                
            ])

df.show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  1|dog dog|housé|     5|     a|
|            zombies|  2|    cat|   tv|     6|     b|
|simpsons   cat lady|  2|   frog|table|     7|     1|
|               null|  3|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



In [8]:
df.dtypes

NameError: name 'df' is not defined

## Append row
### Spark
Not available in Spark. You need to create a dataframe and the union to append a row

In [4]:
df.rows().append(["this is a word",2, "this is an animal", "this is a thing", 64, "this is a filter"]).show()

+-------------------+---+-----------------+---------------+------+----------------+
|              words|num|          animals|          thing|second|          filter|
+-------------------+---+-----------------+---------------+------+----------------+
|  I like     fish  |  1|          dog dog|          housé|     5|               a|
|            zombies|  2|              cat|             tv|     6|               b|
|simpsons   cat lady|  2|             frog|          table|     7|               1|
|               null|  3|            eagle|          glass|     8|               c|
|     this is a word|  2|this is an animal|this is a thing|    64|this is a filter|
+-------------------+---+-----------------+---------------+------+----------------+



## Filter by type
### Spark 
Not available in Spark Vanilla.

### Pandas


In [6]:
df.rows().filter_by_type("filter", type = "integer").show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  1|dog dog|housé|     5|     a|
|            zombies|  2|    cat|   tv|     6|     b|
|               null|  3|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



## Lookup
### Spark
Not available in Spark Vanilla.

### Pandas

In [11]:
df.rows().lookup("animals", ["dog", "cat", "eagle"], "just animals").show()

+-------------------+---+------------+-----+------+------+
|              words|num|     animals|thing|second|filter|
+-------------------+---+------------+-----+------+------+
|  I like     fish  |  1|     dog dog|housé|     5|     a|
|            zombies|  2|just animals|   tv|     6|     b|
|simpsons   cat lady|  2|        frog|table|     7|     1|
|               null|  3|just animals|glass|     8|     c|
+-------------------+---+------------+-----+------+------+



## Apply

In [10]:
def func(value): 
    return str(int(value) + 1 )

df.rows().apply('num', func).show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  2|dog dog|housé|     5|     a|
|            zombies|  3|    cat|   tv|     6|     b|
|simpsons   cat lady|  3|   frog|table|     7|     1|
|               null|  4|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



## Apply by type
### Spark
Not available in Spark Vanilla.

### Pandas


In [15]:
def func(value): 
    return str(int(value) + 1 )

df.rows().apply_by_type([('num', 'integer', func)]).show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  2|dog dog|housé|     5|     a|
|            zombies|  3|    cat|   tv|     6|     b|
|simpsons   cat lady|  3|   frog|table|     7|     1|
|               null|  4|  eagle|glass|     8|     c|
+-------------------+---+-------+-----+------+------+



In [18]:
df.rows().apply_by_type([('filter', 'integer', "d"), ('thing', 'string', "aaa")]).show()

+-------------------+---+-------+-----+------+------+
|              words|num|animals|thing|second|filter|
+-------------------+---+-------+-----+------+------+
|  I like     fish  |  1|dog dog|  aaa|     5|     a|
|            zombies|  2|    cat|  aaa|     6|     b|
|simpsons   cat lady|  2|   frog|  aaa|     7|     d|
|               null|  3|  eagle|  aaa|     8|     c|
+-------------------+---+-------+-----+------+------+

