# Using Spark from Livy end point
The samples demostrates using spark service via the livy end point. The sample uses the python request library to access the livy interface. 
Livy API details are available at https://livy.incubator.apache.org/docs/latest/rest-api.html
** Note : The image here may not be visible dues to markdown bug. Please change path here to full path to view the image.
<img src = "./spark_from_livy.jpg" style="float: center;" alt="drawing" width="900">



## Using python requests library to access restful APIs

In [1]:
import json, pprint, requests, textwrap
from requests.auth import HTTPBasicAuth
print("Get username and email ")
print("---------------------- ")

#Basic test for request
import requests

r = requests.get('https://jsonplaceholder.typicode.com/users')
r.encoding = 'utf-8'
ret_json = r.json()


for user in ret_json:
    print(user['username'] , user['email'])

#print(r.json()) 



Get username and email 
---------------------- 


Bret Sincere@april.biz
Antonette Shanna@melissa.tv
Samantha Nathan@yesenia.net
Karianne Julianne.OConner@kory.org
Kamren Lucio_Hettinger@annie.ca
Leopoldo_Corkery Karley_Dach@jasper.info
Elwyn.Skiles Telly.Hoeger@billy.biz
Maxime_Nienow Sherwood@rosamond.me
Delphine Chaim_McDermott@dana.io
Moriah.Stanton Rey.Padberg@karina.biz


## Livy APIs to fetch running sessions and sesssion state

In [2]:
import json, pprint, requests, textwrap
from requests.auth import HTTPBasicAuth

#Diabling exception to avoid the https verificatin warning
import requests.packages.urllib3 as urllib3
import urllib3.exceptions as urllib3_exceptions       
urllib3.disable_warnings(urllib3_exceptions.InsecureRequestWarning)


#Change host per your confirgration                               
host = "https://<ip>:<port>/gateway/default/livy/v1"

#Livy interface as per https://livy.incubator.apache.org/docs/latest/rest-api.html#session

# Construct Request - Get a list of current spark sessions.
sessions_url = host + "/sessions"

# Common headers for all requests.
# Auth header
auth = HTTPBasicAuth("***", "***")
# Content Type
headers = {'Content-Type': 'application/json'}

data = {
        'from': 0, 
        'size': 10
       }
r = requests.get(sessions_url, data=json.dumps(data), headers=headers, auth=auth, verify=False)

response_body = r.json()

print("Sessions fetched starting ", response_body['from'])
print("Number of session fetched", response_body['total'])
session_list = response_body['sessions']

for session in session_list:
    print("The session {0} has a state {1} ".format(session['id'],session['state']))



Sessions fetched starting  0
Number of session fetched 1
The session 10 has a state idle 


## Create a new Spark session and query state

In [3]:
#Create a new Spark session
data = {
    'kind':'pyspark'
}

r = requests.post(sessions_url, data=json.dumps(data), headers=headers, auth=auth, verify=False)
response_body = r.json()

session_id = response_body['id']
session_state = response_body['state']
print("Spark session {0} created. Current state is {1}".format(session_id,session_state))
created_session_url = r.headers['location']





Spark session 13 created. Current state is starting


In [11]:
#Query information about the session we just created
this_session_url = host + created_session_url
print("this_session_url", this_session_url)

r = requests.get(this_session_url, headers=headers,auth=auth, verify=False)
pprint.pprint(r.json())

this_session_url https://13.91.32.53:30443/gateway/default/livy/v1/sessions/13


{'appId': 'application_1560270944894_0014',
 'appInfo': {'driverLogUrl': 'http://storage-0-0.storage-0-svc.newaks.svc.cluster.local:8042/node/containerlogs/container_1560270944894_0014_01_000001/root',
             'sparkUiUrl': 'http://master-0.master-svc:8088/proxy/application_1560270944894_0014/'},
 'id': 13,
 'kind': 'pyspark',
 'log': ['\t ApplicationMaster RPC port: -1',
         '\t queue: default',
         '\t start time: 1560880472284',
         '\t final status: UNDEFINED',
         '\t tracking URL: '
         'http://master-0.master-svc:8088/proxy/application_1560270944894_0014/',
         '\t user: root',
         '19/06/18 17:54:32 INFO util.ShutdownHookManager: Shutdown hook '
         'called',
         '19/06/18 17:54:32 INFO util.ShutdownHookManager: Deleting directory '
         '/tmp/spark-de7b25a4-1605-4d7f-bef5-c921175c8ae9',
         '19/06/18 17:54:32 INFO util.ShutdownHookManager: Deleting directory '
         '/tmp/spark-00164108-299d-4313-b800-04bbfb8c1e5a',

## Execute code interactively Spark session and check results

In [12]:

#Execute code interactively in this session using session/<ID>/statements interface

statements_url = this_session_url + "/statements"
pprint.pprint(statements_url)

data = {
  'code': "11+11"
}

r = requests.post(statements_url, data=json.dumps(data), headers=headers, auth=auth, verify=False)
print("Respone status code" ,r.status_code )
print("Response received is ",r.json())
print("Poll URI is ",r.headers['location'])
#response_body = r.json()





'https://13.91.32.53:30443/gateway/default/livy/v1/sessions/13/statements'


Respone status code 201
Response received is  {'id': 0, 'code': '11+11', 'state': 'waiting', 'output': None, 'progress': 0.0}
Poll URI is  /sessions/13/statements/0


In [13]:
# Wait a while before executing this cell. Spark session start up takes time to run your code.
specific_statement = host + r.headers['location']
print("monitoring url is ", specific_statement)

r = requests.get(specific_statement, headers=headers, auth=auth, verify=False)
print("Response status is ",r.status_code)
print("Response received is ", pprint.pprint(r.json()))

monitoring url is  https://13.91.32.53:30443/gateway/default/livy/v1/sessions/13/statements/0


Response status is  200
{'code': '11+11',
 'id': 0,
 'output': {'data': {'text/plain': '22'}, 'execution_count': 0, 'status': 'ok'},
 'progress': 1.0,
 'state': 'available'}
Response received is  None


## Execute code in batch

In [15]:
#Execute code in batch
#The following uses pi.py from spark source. Please get that file and transer to HDFS /jar folder.
batch_url = host + "/batches"
print("batch_url", batch_url)

data = {
    'file' : '/jar/pi.py'    
}

r = requests.post(batch_url, data=json.dumps(data), headers=headers, auth=auth, verify=False)
returned_batch_url = r.headers['location']
print("Respone status code" , r.status_code)
print("Poll URI is ",returned_batch_url )
print("Response is ", pprint.pprint(r.json()))

batch_url https://13.91.32.53:30443/gateway/default/livy/v1/batches


Respone status code 201
Poll URI is  /batches/1
{'appId': None,
 'appInfo': {'driverLogUrl': None, 'sparkUiUrl': None},
 'id': 1,
 'log': ['stdout: ', '\nstderr: ', '\nYARN Diagnostics: '],
 'name': None,
 'state': 'starting'}
Response is  None


In [16]:
#Check results of executed code
specific_batch = host + returned_batch_url
print("specific batch request ",specific_batch)

r = requests.get(specific_batch,headers=headers, auth=auth, verify = False)
print("Response status is ",r.status_code)
print("Response received is ", pprint.pprint(r.json()))

specific batch request  https://13.91.32.53:30443/gateway/default/livy/v1/batches/1


Response status is  200
{'appId': None,
 'appInfo': {'driverLogUrl': None, 'sparkUiUrl': None},
 'id': 1,
 'log': ['stdout: ',
         '19/06/18 17:57:32 WARN util.NativeCodeLoader: Unable to load '
         'native-hadoop library for your platform... using builtin-java '
         'classes where applicable',
         '\nstderr: ',
         '\nYARN Diagnostics: '],
 'name': None,
 'state': 'starting'}
Response received is  None
