### Install Layer

In [3]:
pip install layer-sdk -qqq

In [4]:
pip install -U ipython -qqq

In [5]:
import layer
layer.login()


Please open the following link in your web browser. Once logged in, copy the code and paste it here.
https://auth.beta.layer.co/authorize?response_type=code&code_challenge=Yg35QU-zIIO8xLdz29Uja7_vl7_-fyu6ddEiZehkeCU&code_challenge_method=S256&client_id=tvp1Wo8acTt4z46c7MbUB3Rg5MqfmqkN&redirect_uri=https://beta.layer.co/oauth/code&scope=offline_access&audience=https://beta.layer.co
Code: KC-uxzBqz9cLzOoF
Successfully logged into https://beta.layer.co


### Fetch the dataset
You will need to have run the project in order to get the dataset using the `get_dataset` command. 

In [None]:
dataset = layer.get_dataset("titanic_dataset")
titanic_dataset = dataset.to_pandas()
titanic_dataset.head()

Unnamed: 0,SURVIVED,PASSENGERID,EMBARKED,NAME,AGE,SEX,FARE,PARCH,CABIN,TICKET,PCLASS,SIBSP
0,0,1,S,"Braund, Mr. Owen Harris",22.0,male,7.25,0,,A/5 21171,3,1
1,1,2,C,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,female,71.2833,0,C85,PC 17599,1,1
2,1,3,S,"Heikkinen, Miss. Laina",26.0,female,7.925,0,,STON/O2. 3101282,3,0
3,1,4,S,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,female,53.1,0,C123,113803,1,1
4,0,5,S,"Allen, Mr. William Henry",35.0,male,8.05,0,,373450,3,0


### Fetch the calculated features as a Spark DataFrame
Once features have been calculated, they can be fetched using the `get_featureset` function. 

In [None]:
passenger_features_spark = layer.get_featureset("passenger_features_spark").to_spark()
passenger_features_spark.show()

+-----------+--------+-------------+---------+--------+---+--------+-----+
|PASSENGERID|AGE_BAND|EMBARK_STATUS|FARE_BAND|IS_ALONE|SEX|SURVIVED|TITLE|
+-----------+--------+-------------+---------+--------+---+--------+-----+
|          1|       1|            0|        0|       0|  1|       0|    1|
|          2|       2|            1|        3|       0|  0|       1|    3|
|          3|       1|            0|        1|       1|  0|       1|    2|
|          4|       2|            0|        3|       0|  0|       1|    3|
|          5|       2|            0|        1|       1|  1|       0|    1|
|          6|       1|            2|        1|       1|  1|       0|    1|
|          7|       3|            0|        3|       1|  1|       0|    1|
|          8|       0|            0|        2|       0|  1|       0|    4|
|          9|       1|            0|        1|       0|  0|       1|    3|
|         10|       0|            1|        2|       0|  0|       1|    3|
|         11|       0|   

### Explore the model catalog

In [None]:
survival_model = layer.get_model("survival_model_spark")
print(survival_model.parameters)
print(survival_model.metrics)

{'seed': '42', 'test_size': '0.2'}
{'BinaryClassificationEvaluator': [(1635250815161, 0.876865671641791)]}


In [None]:
trained_classifier = survival_model.get_train()
trained_classifier

PipelineModel_d0c9db1acdf3

#### Make a prediction

In [None]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null 
!wget -q https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz


In [None]:
!tar xzf spark-3.2.0-bin-hadoop3.2.tgz

In [None]:
import os 
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64" 
os.environ["SPARK_HOME"] = "/content/spark-3.2.0-bin-hadoop3.2"

In [6]:
pip install findspark -qqq

In [None]:
import findspark 
findspark.init()

In [None]:
import pyspark
sc = pyspark.SparkContext.getOrCreate();

In [None]:
from pyspark.sql import SQLContext


In [None]:
sqlContext  = SQLContext(sc)

test = sqlContext.createDataFrame([
    (0, 0,1,1,0,1),
    ], ["EMBARK_STATUS", "FARE_BAND","SEX","AGE_BAND","IS_ALONE","TITLE"])

In [None]:
test.show()

+-------------+---------+---+--------+--------+-----+
|EMBARK_STATUS|FARE_BAND|SEX|AGE_BAND|IS_ALONE|TITLE|
+-------------+---------+---+--------+--------+-----+
|            0|        0|  1|       1|       0|    1|
+-------------+---------+---+--------+--------+-----+



In [None]:
from pyspark.ml.feature import VectorAssembler


In [None]:
feat_cols = ['AGE_BAND', 'EMBARK_STATUS', 'FARE_BAND', 'IS_ALONE', 'SEX', 'TITLE']
vec_assember = VectorAssembler(inputCols=feat_cols, outputCol='features')
test_data = vec_assember.transform(test)

In [None]:
test_data.select("features").show(1, False)

+-------------------------+
|features                 |
+-------------------------+
|[1.0,0.0,0.0,0.0,1.0,1.0]|
+-------------------------+



In [None]:
 test_data

DataFrame[EMBARK_STATUS: bigint, FARE_BAND: bigint, SEX: bigint, AGE_BAND: bigint, IS_ALONE: bigint, TITLE: bigint, features: vector]

In [None]:
predictions = trained_classifier.transform(test_data)


In [None]:
predictions

DataFrame[EMBARK_STATUS: bigint, FARE_BAND: bigint, SEX: bigint, AGE_BAND: bigint, IS_ALONE: bigint, TITLE: bigint, features: vector, rawPrediction: vector, probability: vector, prediction: double]

In [None]:
predictions.select('probability','AGE_BAND','prediction','SEX','FARE_BAND','EMBARK_STATUS','IS_ALONE','TITLE').show()


+--------------------+--------+----------+---+---------+-------------+--------+-----+
|         probability|AGE_BAND|prediction|SEX|FARE_BAND|EMBARK_STATUS|IS_ALONE|TITLE|
+--------------------+--------+----------+---+---------+-------------+--------+-----+
|[0.94872612797609...|       1|       0.0|  1|        0|            0|       0|    1|
+--------------------+--------+----------+---+---------+-------------+--------+-----+

