# Notebook to generate graphs for main

In [2]:
import os
from pyspark.sql import SparkSession
import pandas as pd
from cassandra.cluster import Cluster

# Set pyspark env
os.environ["PYSPARK_PYTHON"] = "python"

spark = SparkSession.builder.appName('SparkCassandraApp').\
    config('spark.jars.packages', 'com.datastax.spark:spark-cassandra-connector_2.12:3.4.1').\
    config('spark.cassandra.connection.host', 'localhost').\
    config('spark.sql.extensions', 'com.datastax.spark.connector.CassandraSparkExtensions').\
    config('spark.sql.catalog.mycatalog', 'com.datastax.spark.connector.datasource.CassandraCatalog').\
    config('spark.cassandra.connection.port', '9042').getOrCreate()


cluster = Cluster(['localhost'], port=9042)
session = cluster.connect()
session.set_keyspace('compulsory')

def _get_df(table_name):

    (spark.read.format("org.apache.spark.sql.cassandra")
    .options(table=table_name, keyspace="compulsory")
    .load()
    .createOrReplaceTempView(table_name))

    df = spark.sql(f"select * from {table_name}").toPandas()
    return df


In [3]:
fish_data = _get_df('fish_data_full')

In [22]:
lice_data = _get_df('lice_data_full').sort_values(by=['week'])

In [23]:
cols = ['avgadultfemalelice', 'avgmobilelice', 'avgstationarylice']

# Create a plotly graph to plot the three columns in same plot. Use lineplot. Make it nice. Use lice_data

# Path: graph_nb.ipynb
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

fig = go.Figure()

for col in cols:
    fig.add_trace(go.Scatter(x=lice_data['week'], y=lice_data[col], name=col, mode = 'lines+markers'))

fig.update_layout(title='Lice data', xaxis_title='week', yaxis_title='Lice count')
fig.show()