In [1]:
import requests

In [2]:
import requests_kerberos

In [9]:
from collections import namedtuple

In [14]:
Batch = namedtuple('Batch', ['batch_id', 'appId', 'appInfo', 'log', 'state'])

In [15]:
kwargs = {'batch_id': 1, 'appId': 'application_100_10002', 'appInfo': 'foo', 'log': ['a', 'b', 'c'], 'state': 'RUNNING'}

In [18]:
Batch(**kwargs)

Batch(batch_id=1, appId='application_100_10002', appInfo='foo', log=['a', 'b', 'c'], state='RUNNING')

In [29]:
class LivyAPI:
    def __init__(self, server_url, port=8998, use_tls=False, headers=None, auth=None):
        if auth is None:
            auth = requests_kerberos.HTTPKerberosAuth(
                mutual_authentication=requests_kerberos.REQUIRED,
                force_preemptive=True)
            
        self._auth = auth
        
        protocol = 'http'
        if use_tls:
            protocol = 'https'
            
        self._base_url = '%s://%s:%s' % (protocol, server_url, port)
        
        if headers is None:
            headers = {'Content-Type': 'application/json'}
        
        self._headers = headers

    
    def all_info(self, from_index=None, size=None):
        """Returns all the active batch sessions.
        
        Handles the following endpoints:        
        GET /batches
        
        Parameters
        ----------
        from : int, optional
            The start index to fetch sessions
        size : int, optional
        
        Returns
        -------
        int
            The start index of the fetched sessions
        int
            The number of fetched sessions
        list
            List of Batch objects
        """
        url = '%s/batches' % (self._base_url)
        data = {}
        if from_index:
            data['from'] = from_index
        if size:
            data['size'] = size

        response = self._request('get', url, data=data)
        return response
        
    def info(self, batch_id):
        """Returns the batch session information.
        
        Handles the following endpoint:
        GET /batches/{batchId} if `batch_id` is provided and `short` is False
        
        Parameters
        ----------
        batch_id : int
        """
        url = '%s/batches' % (self._base_url)
        response = self._request('get', url)
        return response
#         return Batch(**response_as_dict)

    def state(self, batch_id):
        """Returns the state of batch session.
        
        Handles the following endpoint:
        GET /batches/{batchId}/state if `batch_id is provided and `short` is True
        
        Parameters
        ----------
        batch_id : str, optional
        short : bool, optional
            Defaults to False
            
        Returns
        -------
        int
            Batch session id
        str
            The current state of batch session
            
        """
        url = "%s/%s/state" % (url, batch_id)
        response = self._request('get', url)
        return response

    def _request(self, rest_action, url, data=None):
        """
        Helper function to handle some boilerplate get requests to the Livy server

        Parameters
        ----------
        rest_action : {'get', 'post', 'delete'}
            The REST API action to perform. Must be one of 'get', 'post', or 'delete'
        url : str
            The url to hit
        data : dict
            The parameters that should be sent to the Livy REST endpoint

            
        Returns
        -------
        dict
            The raw response from the Livy API as a Python dict
        """
        if data is None:
            data = {}
        # Convert the REST payload into JSON.
        json_data = json.dumps(data)
        # Find the right function to call, 'get', 'post', or 'delete'
        func = getattr(requests, rest_action)
        # interact with Livy
        resp = func(url, auth=self._auth, data=json_data, headers=self._headers)
        # Make sure that our request was successful
        resp.raise_for_status()
        return resp.json()


In [30]:
api = LivyAPI(server_url='ec2-34-203-229-157.compute-1.amazonaws.com')

In [33]:
api.all_info()

KerberosExchangeError: authGSSClientStep() failed: (('Unspecified GSS failure.  Minor code may provide more information', 851968), ('Server krbtgt/COMPUTE-1.AMAZONAWS.COM@EMR.CONTIUUM.IO not found in Kerberos database', -1765328377))

In [3]:
import json

In [4]:
file_on_hdfs = 'hdfs://tmp/foo'
job_name = "A Livy Job"
spark_config = {"spark.pyspark.python": "/opt/anaconda/bin/python"}

In [5]:
data = {
    'file': file_on_hdfs,
    'name': job_name,
    'conf': spark_config
}
json_data = json.dumps(data)


In [6]:
json_data

'{"file": "hdfs://tmp/foo", "name": "A Livy Job", "conf": {"spark.pyspark.python": "/opt/anaconda/bin/python"}}'

In [8]:
json_data = json.dumps(json_data)
json_data

'"\\"{\\\\\\"file\\\\\\": \\\\\\"hdfs://tmp/foo\\\\\\", \\\\\\"name\\\\\\": \\\\\\"A Livy Job\\\\\\", \\\\\\"conf\\\\\\": {\\\\\\"spark.pyspark.python\\\\\\": \\\\\\"/opt/anaconda/bin/python\\\\\\"}}\\""'