In [97]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.conf import * 

In [648]:
# change the directory of data accordingly
file_name = "Data/161195286.csv"

In [649]:
sc

### 1. Read the dataset

In [650]:
schema = StructType([
            StructField("time_stamp", DoubleType(), True),
            StructField("sensor_type", IntegerType(), True),
            StructField("sensor_name", StringType(), True),
            StructField("x_val", DoubleType(), True),
            StructField("y_val", DoubleType(), True),
            StructField("z_val", DoubleType(), True)])

# For info on SparkSession, refer: https://jaceklaskowski.gitbooks.io/mastering-apache-spark/spark-sql-SparkSession.html
spark = SparkSession.builder \
        .appName("MediWatch") \
        .master("local[4]") \
        .config(conf=SparkConf()) \
        .getOrCreate()
        
df_motion = spark.read \
                 .format("csv") \
                 .option("delimiter", ",") \
                 .load(file_name, schema=schema)

In [651]:
df_motion.show(20)

+-------------+-----------+--------------------+----------+----------+---------+
|   time_stamp|sensor_type|         sensor_name|     x_val|     y_val|    z_val|
+-------------+-----------+--------------------+----------+----------+---------+
|1.61185157E14|          1|BMI160 accelerometer|-2.3854396|-3.9134622|  8.50711|
|1.61185173E14|          1|BMI160 accelerometer| -2.339934|-3.9517825| 8.411309|
| 1.6118519E14|          1|BMI160 accelerometer|-2.3566992|-4.3709106| 8.432864|
| 1.6118519E14|          1|BMI160 accelerometer|-2.5866213|-4.8355446|  8.54543|
|1.61185207E14|          1|BMI160 accelerometer|-2.8309133|-5.1756372| 8.519085|
|1.61185207E14|          1|BMI160 accelerometer|-2.8979738| -4.782854| 8.468789|
|1.61185224E14|          1|BMI160 accelerometer|-3.1374757| -4.519402| 8.368198|
| 1.6118524E14|          1|BMI160 accelerometer| -3.278782|-4.7780643| 8.286768|
| 1.6118524E14|          1|BMI160 accelerometer|-3.2572267|-4.9457154| 8.293953|
|1.61185257E14|          1|B

In [652]:
df_motion.printSchema()

root
 |-- time_stamp: double (nullable = true)
 |-- sensor_type: integer (nullable = true)
 |-- sensor_name: string (nullable = true)
 |-- x_val: double (nullable = true)
 |-- y_val: double (nullable = true)
 |-- z_val: double (nullable = true)



### 3. Plot the readings from accelerometer

In [653]:
# install plotly: pip install plotly
# need to create an account on https://plot.ly/, then go to https://plot.ly/settings/api and follow the instructions
import plotly.plotly as py
import plotly.graph_objs as go
import plotly

plotly.tools.set_credentials_file(username='suhtwins.ad', api_key='sBMMH5VVQJJ20oW9Bk9H')

In [654]:
# check distinct types of sensors
df_motion.select(df_motion["sensor_type"]).distinct().show()

+-----------+
|sensor_type|
+-----------+
|          1|
|          4|
+-----------+



In [655]:
def plot_sensor_data(dataframe, sensor_name):
    coor_names = ["x_val", "y_val", "z_val"]
    plot = []
    for coor in coor_names:
        temp = dataframe.where(dataframe['sensor_name'] == sensor_name).select(["time_stamp", coor]).toPandas()
        plot.append(
            go.Scatter(
            x=temp["time_stamp"],
            y=temp[coor],
            name=coor + ' ' + sensor_name))
    return go.Data(plot)

In [656]:
accler_plot = plot_sensor_data(df_motion, "BMI160 accelerometer")
py.iplot(accler_plot)

In [657]:
accler_plot = plot_sensor_data(df_motion, "BMI160 gyroscope")
py.iplot(accler_plot)