## Start a Spark session

In [None]:
from pyspark.sql import SparkSession
from pyspark import SparkContext
import psutil
import numpy
import pandas as pd
import random
import os

logical_threads = int(psutil.cpu_count(logical=True))
available_memory = int(psutil.virtual_memory().available / pow(10, 9))
driver_memory = 2
memory_per_executor = int((available_memory - driver_memory ) / logical_threads * 1000)

# spark = SparkSession.builder.master('local[' + str(logical_threads) + ']')\
#         .config('spark.driver.memory', str(driver_memory) + 'g')\
#         .config('spark.executor.memory', str(memory_per_executor) + 'm')\
#         .config('spark.ui.proxyBase', os.environ['JUPYTERHUB_SERVICE_PREFIX'] + 'proxy/4040')\
#         .getOrCreate()

swan_spark_conf = swan_spark_conf.setAll([('spark.driver.memory', str(driver_memory) + 'g'),
                                          ('spark.executor.memory', str(memory_per_executor) + 'm'),
                                          ('spark.ui.proxyBase', os.environ['JUPYTERHUB_SERVICE_PREFIX'] + 'proxy/4040')])

sc = SparkContext.getOrCreate(conf=swan_spark_conf)

## Test Spark

In [None]:
num_samples = 1000000
def inside(p):     
  x, y = random.random(), random.random()
  return x*x + y*y < 1
count = sc.parallelize(range(0, num_samples)).filter(inside).count()
pi = 4 * count / num_samples
print(pi)

## Create link to Spark UI (absolute path)

In [None]:
%%js

var base_url=window.location.href.split("user")[0]
IPython.notebook.kernel.execute('base_url="' + base_url + '";')

In [None]:
#from pyspark.context import SparkContext

#def uiWebUrl(self):
#    from urllib.parse import urlparse
#    web_url = self._jsc.sc().uiWebUrl().get()
#    port = urlparse(web_url).port
#    return "/proxy/{}/jobs/".format(port)

#SparkContext.uiWebUrl = property(uiWebUrl)

from IPython.display import display, Markdown

sparkUI_link = base_url + 'user/' + os.environ['JUPYTERHUB_USER'] + '/proxy/4040/jobs/'

display(Markdown('[Spark UI]('+sparkUI_link+')'))

### Or have a link to Spark UI with relative path

Click this link [Spark UI]().

## Spark UI in notebook

In [None]:
from IPython import display
display.IFrame(src=sparkUI_link, width=990, height=900)

## Stop spark context

In [None]:
sc.stop()

## Read from dataset

In [None]:
df = pd.read_csv('/home/jovyan/datasets/dataset_1.csv')
df

### Misc

In [None]:
# import socket

# print(spark.sparkContext.uiWebUrl) 
# print(socket.gethostbyname(socket.gethostname()))
# print(socket.getfqdn())
# print(socket.gethostbyname_ex(socket.gethostname())[-1]) #ipv4 addresses
# !hostname -i

# from notebook import notebookapp
# servers = list(notebookapp.list_running_servers())
# print(servers)