- sudo add-apt-repository ppa:webupd8team/java
- sudo apt-get update
- sudo apt-get install oracle-java8-installer
- wget https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh
- bash Anaconda3-4.2.0-Linux-x86_64.sh
- source ~/.bashrc
- pip install spark-sklearn
- sudo apt-get install mysql-server
- wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.39.tar.gz
- tar -xvf mysql-connector-java-5.1.39.tar.gz
- wget https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz
- tar -xvf spark-2.2.0-bin-hadoop2.7.tgz
- ln -s spark-2.2.0-bin-hadoop2.7 spark
- SSH with source Anywhere
- HTTPS with source Anywhere
- Custom TCP Rule with Port 8888 and source Anywhere
- mkdir certificates
- cd certificates
- openssl genrsa -out server.key 1024
- openssl req -new -key server.key -out server.csr
- openssl x509 -req -days 366 -in server.csr -signkey server.key -out server.crt
- cat server.crt server.key > server.pem
- jupyter notebook --generate-config
- cd ~/.jupyter
- vi jupyter_notebook_config.py
- c = get_config()
- c.IPKernelApp.pylab = 'inline'
- c.NotebookApp.certfile = '/home/ubuntu/certificates/server.pem'
- c.NotebookApp.ip = '*'
- c.NotebookApp.open_browser = False
- c.NotebookApp.port = 8888
- cd /home/ubuntu/spark/conf
- cp spark-defaults.conf.template spark-defaults.conf
- vi spark-defaults.conf
- spark.jars.packages com.databricks:spark-xml_2.11:0.4.0
- spark.jars /home/ubuntu/mysql-connector-java-5.1.39/mysql-connector-java-5.1.39-bin.jar
2. Environment Variables - you have to add this variables, so you can easily run PySpark as a Jupyter Notebook
- vi ~/.bashrc
- export JAVA_HOME="/usr/lib/jvm/java-8-oracle"
- export SPARK_HOME="/home/ubuntu/spark"
- export PATH="$SPARK_HOME/bin:$SPARK_HOME:$PATH"
- source ~/.bashrc
- pip install pyspark
- cd DSR-Spark-Class
- pyspark OR nohup pyspark
- SSH with source Anywhere
- HTTPS with source Anywhere
- Custom TCP Rule with Port 8888 and source Anywhere
- ssh -i mykeypairfile.pem ubuntu@ec2-XX-XX-XX-XX.us-west-2.compute.amazonaws.com
- sudo apt-get update
- sudo apt-get install git