### Spark notebook ###

This notebook will only work in a Jupyter session running on `mathmadslinux2p`.

You can start your own Jupyter session on `mathmadslinux2p` and open this notebook in Chrome on the MADS Windows server by

**Steps**

1. Login to the MADS Windows server using https://mathportal.canterbury.ac.nz/.
2. Download or copy this notebook to your home directory.
3. Open powershell and run `ssh mathmadslinux2p`.
4. Run `start_pyspark_notebook` or `/opt/anaconda3/bin/jupyter-notebook --ip 132.181.129.68 --port $((8000 + $((RANDOM % 999))))`.
5. Copy / paste the url provided in the shell window into Chrome on the MADS Windows server.
6. Open the notebook from the Jupyter root directory (which is your home directory).
7. Run `start_spark()` to start a spark session in the notebook.
8. Run `stop_spark()` before closing the notebook or kill your spark application by hand using the link in the Spark UI.

In [27]:
# Run this cell to import pyspark and to define start_spark() and stop_spark()

import findspark

findspark.init()

import getpass
import pandas
import pyspark
import random
import re

from IPython.display import display, HTML
from pyspark import SparkContext
from pyspark.sql import SparkSession


# Functions used below

def username():
    """Get username with any domain information removed.
    """

    return re.sub('@.*', '', getpass.getuser())


def dict_to_html(d):
    """Convert a Python dictionary into a two column table for display.
    """

    html = []

    html.append(f'<table width="100%" style="width:100%; font-family: monospace;">')
    for k, v in d.items():
        html.append(f'<tr><td style="text-align:left;">{k}</td><td>{v}</td></tr>')
    html.append(f'</table>')

    return ''.join(html)


def show_as_html(df, n=20):
    """Leverage existing pandas jupyter integration to show a spark dataframe as html.
    
    Args:
        n (int): number of rows to show (default: 20)
    """

    display(df.limit(n).toPandas())

    
def display_spark():
    """Display the status of the active Spark session if one is currently running.
    """
    
    if 'spark' in globals() and 'sc' in globals():

        name = sc.getConf().get("spark.app.name")
        
        html = [
            f'<p><b>Spark</b></p>',
            f'<p>The spark session is <b><span style="color:green">active</span></b>, look for <code>{name}</code> under the running applications section in the Spark UI.</p>',
            f'<ul>',
            f'<li><a href="http://mathmadslinux2p.canterbury.ac.nz:8080/" target="_blank">Spark UI</a></li>',
            f'<li><a href="{sc.uiWebUrl}" target="_blank">Spark Application UI</a></li>',
            f'</ul>',
            f'<p><b>Config</b></p>',
            dict_to_html(dict(sc.getConf().getAll())),
            f'<p><b>Notes</b></p>',
            f'<ul>',
            f'<li>The spark session <code>spark</code> and spark context <code>sc</code> global variables have been defined by <code>start_spark()</code>.</li>',
            f'<li>Please run <code>stop_spark()</code> before closing the notebook or restarting the kernel or kill <code>{name}</code> by hand using the link in the Spark UI.</li>',
            f'</ul>',
        ]
        display(HTML(''.join(html)))
        
    else:
        
        html = [
            f'<p><b>Spark</b></p>',
            f'<p>The spark session is <b><span style="color:red">stopped</span></b>, confirm that <code>{username() + " (jupyter)"}</code> is under the completed applications section in the Spark UI.</p>',
            f'<ul>',
            f'<li><a href="http://mathmadslinux2p.canterbury.ac.nz:8080/" target="_blank">Spark UI</a></li>',
            f'</ul>',
        ]
        display(HTML(''.join(html)))


# Functions to start and stop spark

def start_spark(executor_instances=2, executor_cores=1, worker_memory=1, master_memory=1):
    """Start a new Spark session and define globals for SparkSession (spark) and SparkContext (sc).
    
    Args:
        executor_instances (int): number of executors (default: 2)
        executor_cores (int): number of cores per executor (default: 1)
        worker_memory (float): worker memory (default: 1)
        master_memory (float): master memory (default: 1)
    """

    global spark
    global sc

    user = username()
    
    cores = executor_instances * executor_cores
    partitions = cores * 4
    port = 4000 + random.randint(1, 999)

    spark = (
        SparkSession.builder
        .master("spark://masternode2:7077")
        .config("spark.driver.extraJavaOptions", f"-Dderby.system.home=/tmp/{user}/spark/")
        .config("spark.dynamicAllocation.enabled", "false")
        .config("spark.executor.instances", str(executor_instances))
        .config("spark.executor.cores", str(executor_cores))
        .config("spark.cores.max", str(cores))
        .config("spark.executor.memory", f"{worker_memory}g")
        .config("spark.driver.memory", f"{master_memory}g")
        .config("spark.driver.maxResultSize", "0")
        .config("spark.sql.shuffle.partitions", str(partitions))
        .config("spark.ui.port", str(port))
        .appName(user + " (jupyter)")
        .getOrCreate()
    )
    sc = SparkContext.getOrCreate()
    
    display_spark()

    
def stop_spark():
    """Stop the active Spark session and delete globals for SparkSession (spark) and SparkContext (sc).
    """

    global spark
    global sc

    if 'spark' in globals() and 'sc' in globals():

        spark.stop()

        del spark
        del sc

    display_spark()


# Make css changes to improve spark output readability

html = [
    '<style>',
    'pre { white-space: pre !important; }',
    'table.dataframe td { white-space: nowrap !important; }',
    'table.dataframe thead th:first-child, table.dataframe tbody th { display: none; }',
    '</style>',
]
display(HTML(''.join(html)))

### Example notebook ###

The code below provides a template for how you would use a notebook to start spark, run some code, and then stop spark.

**Steps**

- Run `start_spark()` to start a spark session in the notebook (only change the default resources when advised to do so for an exercise or assignment)
- Write and run code interactively, creating additional cells as needed.
- Run `stop_spark()` before closing the notebook or kill your spark application by hand using the link in the [Spark UI](http://mathmadslinux2p.canterbury.ac.nz:8080/).

In [28]:
# Run this cell to start a spark session in this notebook

#start_spark(executor_instances=4, executor_cores=2, worker_memory=4, master_memory=4)

start_spark(executor_instances=8, executor_cores=4, worker_memory=8, master_memory=8)

0,1
spark.dynamicAllocation.enabled,false
spark.ui.port,4456
spark.sql.warehouse.dir,file:/users/home/dcp31/assignment_1/spark-warehouse
spark.master,spark://masternode2:7077
spark.app.name,dcp31 (jupyter)
spark.executor.id,driver
spark.executor.instances,8
spark.driver.port,33175
spark.driver.host,mathmadslinux2p.canterbury.ac.nz
spark.app.startTime,1714302082113


In [29]:
# Write your imports here or insert cells below

from pyspark.sql import functions as F
from pyspark.sql.types import *

## Analysis Question 4
### Q4 (a)

In [30]:
# load all of daily
schema_daily = StructType([
    StructField("station_id", StringType(), True),
    StructField("date", DateType(), True), # load as date type this time as using date in later query
    StructField("element", StringType(), True),
    StructField("value", IntegerType(), True),
    StructField("m_flag", StringType(), True),
    StructField("q_flag", StringType(), True),
    StructField("s_flag", StringType(), True),
    StructField("time", StringType(), True)
])

daily_all = (
    spark.read.format("com.databricks.spark.csv")
    .option("header", "false")
    .option("inferSchema", "false")
    .option("dateFormat", "yyyyMMdd")
    .schema(schema_daily)
    .load("hdfs:///data/ghcnd/daily/*.csv.gz") # all .csv.gz files in the folder
)
   
show_as_html(daily_all, 10)

Unnamed: 0,station_id,date,element,value,m_flag,q_flag,s_flag,time
0,AE000041196,2010-01-01,TMAX,259,,,S,
1,AE000041196,2010-01-01,TMIN,120,,,S,
2,AE000041196,2010-01-01,TAVG,181,H,,S,
3,AEM00041194,2010-01-01,TMAX,250,,,S,
4,AEM00041194,2010-01-01,TMIN,168,,,S,
5,AEM00041194,2010-01-01,PRCP,0,,,S,
6,AEM00041194,2010-01-01,TAVG,194,H,,S,
7,AEM00041217,2010-01-01,TMAX,250,,,S,
8,AEM00041217,2010-01-01,TMIN,146,,,S,
9,AEM00041217,2010-01-01,TAVG,199,H,,S,


In [5]:
# count all of daily
daily_all.count()

3103954141

### Q4 (b)

In [6]:
# find observations of the 5 core elements

# create list of core elements
core_elements = ['PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN']

# filter by element being in core_elements list
daily_core = daily_all.filter(F.col("element").isin(core_elements))

show_as_html(daily_core, 10)

Unnamed: 0,station_id,date,element,value,m_flag,q_flag,s_flag,time
0,AE000041196,2010-01-01,TMAX,259,,,S,
1,AE000041196,2010-01-01,TMIN,120,,,S,
2,AEM00041194,2010-01-01,TMAX,250,,,S,
3,AEM00041194,2010-01-01,TMIN,168,,,S,
4,AEM00041194,2010-01-01,PRCP,0,,,S,
5,AEM00041217,2010-01-01,TMAX,250,,,S,
6,AEM00041217,2010-01-01,TMIN,146,,,S,
7,AEM00041218,2010-01-01,TMAX,265,,,S,
8,AG000060390,2010-01-01,TMAX,180,,,S,
9,AG000060390,2010-01-01,PRCP,0,,,S,


In [7]:
# how many observations of the core elements? And which has the most observations?
daily_core_count = daily_core.groupBy("element").count()

show_as_html(daily_core_count)

Unnamed: 0,element,count
0,PRCP,1069105193
1,TMAX,455946095
2,SNWD,297846434
3,TMIN,454759421
4,SNOW,353904309


### Q4 (c)

In [8]:
# create unique_elements table that combines all the elements collected by each station on each day through collect_set
daily_element_set = daily_all.groupBy("station_id", "date").agg(F.collect_set(F.col("element")).alias("unique_elements"))

show_as_html(daily_element_set, 10)

Unnamed: 0,station_id,date,unique_elements
0,ACW00011604,1949-04-06,"[TMAX, TMIN, PRCP, SNWD, SNOW, WT16]"
1,ACW00011604,1949-08-03,"[PRCP, SNWD, SNOW]"
2,ACW00011647,1957-11-10,"[PRCP, SNWD, SNOW]"
3,ACW00011647,1957-11-17,"[PRCP, SNWD, SNOW]"
4,ACW00011647,1957-12-05,"[PRCP, SNWD, SNOW]"
5,ACW00011647,1958-10-15,"[PRCP, SNWD, SNOW]"
6,ACW00011647,1959-12-23,"[PRCP, SNWD, SNOW]"
7,ACW00011647,1960-01-26,"[PRCP, SNWD, SNOW]"
8,ACW00011647,1960-09-09,"[PRCP, SNWD, SNOW]"
9,ACW00011647,1960-10-17,"[PRCP, SNWD, SNOW]"


In [10]:
# filter for rows where TMIN in collected but not TMAX
tmin_not_max = (
    daily_element_set.filter(F.array_contains(F.col("unique_elements"), "TMIN") 
                             & ~F.array_contains(F.col("unique_elements"), "TMAX"))
)

show_as_html(tmin_not_max, 10)

Unnamed: 0,station_id,date,unique_elements
0,AE000041196,1963-03-10,"[TMIN, TAVG]"
1,AE000041196,1963-06-15,"[TMIN, TAVG]"
2,AE000041196,1963-07-17,"[TMIN, TAVG]"
3,AE000041196,1964-04-18,"[TMIN, TAVG]"
4,AE000041196,1964-04-19,"[TMIN, TAVG]"
5,AE000041196,1965-06-08,"[TMIN, TAVG]"
6,AE000041196,1968-03-13,"[TMIN, TAVG]"
7,AE000041196,1968-10-05,"[TMIN, TAVG]"
8,AE000041196,1982-03-18,"[TMIN, TAVG]"
9,AE000041196,1997-06-07,"[TMIN, TAVG]"


In [11]:
# count observations
tmin_not_max.count()

9322723

In [12]:
# count unique stations
tmin_not_max.select("station_id").distinct().count()

27927

### Q4 (d)

In [31]:
# load enhanced stations table
stations = (
    spark
    .read
    .option("header", "true")
    .csv("hdfs:///user/dcp31/assignment_1/stations1")
)

show_as_html(stations, 5)


Unnamed: 0,station_id,state_code,country_code,latitude,longitude,elevation,name,gsn_flag,hcn_crn_flag,wmo_id,country_name,state_name,first_year_active,last_year_active,no_unique_elements,unique_elements,core_element_count,other_element_count,PRCP_only
0,AE000041196,,AE,25.333,55.517,34.0,SHARJAH INTER. AIRP,GSN,,41196.0,United Arab Emirates,,1944,2024,4,"TMAX,TMIN,PRCP,TAVG",3,1,
1,AEM00041218,,AE,24.262,55.609,264.9,AL AIN INTL,,,41218.0,United Arab Emirates,,1994,2024,4,"TMAX,TMIN,PRCP,TAVG",3,1,
2,AGE00147715,,AG,35.42,8.1197,863.0,TEBESSA,,,,Algeria,,1879,1938,3,"TMAX,TMIN,PRCP",3,0,
3,AGE00147794,,AG,36.78,5.1,225.0,BEJAIA-CAP CARBON,,,,Algeria,,1926,1938,2,"TMAX,TMIN",2,0,
4,AGM00060402,,AG,36.712,5.07,6.1,SOUMMAM,,,60402.0,Algeria,,1973,2024,5,"TMAX,TMIN,����,PRCP,SNWD",4,1,


In [7]:
# select only the required columns before joining therefore making dataset smaller
# filter for observations of TMIN and TMAX
filtered_daily = daily_all.select("station_id", "date", "element", "value").filter(
    (F.col("element") == "TMIN") | (F.col("element") == "TMAX")
)

filtered_daily.show(10)

+-----------+----------+-------+-----+
| station_id|      date|element|value|
+-----------+----------+-------+-----+
|AE000041196|2010-01-01|   TMAX|  259|
|AE000041196|2010-01-01|   TMIN|  120|
|AEM00041194|2010-01-01|   TMAX|  250|
|AEM00041194|2010-01-01|   TMIN|  168|
|AEM00041217|2010-01-01|   TMAX|  250|
|AEM00041217|2010-01-01|   TMIN|  146|
|AEM00041218|2010-01-01|   TMAX|  265|
|AG000060390|2010-01-01|   TMAX|  180|
|AG000060590|2010-01-01|   TMAX|  240|
|AG000060590|2010-01-01|   TMIN|   40|
+-----------+----------+-------+-----+
only showing top 10 rows



In [8]:
# filter stations for 'New Zealand' 
stations_NZ = stations.select("station_id", "name").filter(F.col("country_name") == "New Zealand")

stations_NZ.show(15)

+-----------+-------------------+
| station_id|               name|
+-----------+-------------------+
|NZ000093417|    PARAPARAUMU AWS|
|NZM00093781|  CHRISTCHURCH INTL|
|NZ000939450|CAMPBELL ISLAND AWS|
|NZM00093929| ENDERBY ISLAND AWS|
|NZ000933090|   NEW PLYMOUTH AWS|
|NZ000093844|INVERCARGILL AIRPOR|
|NZ000093994| RAOUL ISL/KERMADEC|
|NZ000937470|         TARA HILLS|
|NZ000939870|CHATHAM ISLANDS AWS|
|NZ000093292| GISBORNE AERODROME|
|NZM00093439|WELLINGTON AERO AWS|
|NZ000936150| HOKITIKA AERODROME|
|NZM00093678|           KAIKOURA|
|NZM00093110|  AUCKLAND AERO AWS|
|NZ000093012|            KAITAIA|
+-----------+-------------------+



In [9]:
# use broadcast join to join daily obs and NZ obs, with NZ obs being boradcast as it's the smaller table
daily_NZ = filtered_daily.join(F.broadcast(stations_NZ), "station_id", "inner")

daily_NZ.show(10)

+-----------+----------+-------+-----+-------------------+
| station_id|      date|element|value|               name|
+-----------+----------+-------+-----+-------------------+
|NZ000093292|2010-01-01|   TMAX|  297| GISBORNE AERODROME|
|NZ000093292|2010-01-01|   TMIN|   74| GISBORNE AERODROME|
|NZ000093417|2010-01-01|   TMAX|  180|    PARAPARAUMU AWS|
|NZ000093417|2010-01-01|   TMIN|  125|    PARAPARAUMU AWS|
|NZ000093844|2010-01-01|   TMAX|  232|INVERCARGILL AIRPOR|
|NZ000093844|2010-01-01|   TMIN|   96|INVERCARGILL AIRPOR|
|NZ000933090|2010-01-01|   TMAX|  197|   NEW PLYMOUTH AWS|
|NZ000933090|2010-01-01|   TMIN|   82|   NEW PLYMOUTH AWS|
|NZ000936150|2010-01-01|   TMAX|  324| HOKITIKA AERODROME|
|NZM00093110|2010-01-01|   TMAX|  215|  AUCKLAND AERO AWS|
+-----------+----------+-------+-----+-------------------+
only showing top 10 rows



In [17]:
# number of observations
daily_NZ.count()

485520

In [11]:
# count number of years covered
# create new column 'year' which is extracted from column 'date'
daily_NZ = daily_NZ.withColumn("year", F.year(F.col("date")))

# count years
daily_NZ.select("year").distinct().count()

85

In [12]:
# find min and max years
min_year = daily_NZ.select(F.min(F.col("year")))
max_year = daily_NZ.select(F.max(F.col("year")))

min_year.show()
max_year.show()

+---------+
|min(year)|
+---------+
|     1940|
+---------+

None
+---------+
|max(year)|
+---------+
|     2024|
+---------+

None


In [None]:
# had difficulty getting data to save as 1 file. Tried a few options - found this on the internet

# (
#     daily_NZ.repartition(1)
#     .write
#     .format("com.databricks.spark.csv")
#     .option("header", "true")
#     .option("compression", "gzip")
#     .save("/user/dcp31/assignment_1a/daily_NZ.csv")
# )

In [30]:
#!hdfs dfs -get /user/dcp31/assignment_1a/daily_NZ.csv /users/home/dcp31/assignment_1/

### Q4 (e)

In [32]:
# filter daily for observations of PRCP. Group by station_id, date and value. Create new column 'year' extracted from date. 
daily_precip = (
    daily_all.select("station_id", "date", "value")
    .filter(F.col("element") == "PRCP")
    .withColumn("year", F.year(F.col("date")))
)

daily_precip.show(10)

+-----------+----------+-----+----+
| station_id|      date|value|year|
+-----------+----------+-----+----+
|AEM00041194|2010-01-01|    0|2010|
|AG000060390|2010-01-01|    0|2010|
|AG000060590|2010-01-01|    0|2010|
|AG000060611|2010-01-01|    0|2010|
|AGE00147708|2010-01-01|    5|2010|
|AGE00147716|2010-01-01|    5|2010|
|AGE00147718|2010-01-01|    0|2010|
|AGM00060351|2010-01-01|    0|2010|
|AGM00060355|2010-01-01|    0|2010|
|AGM00060360|2010-01-01|    0|2010|
+-----------+----------+-----+----+
only showing top 10 rows



In [33]:
#filter stations to keep only columns necessary
filtered_stations = stations.select("station_id", "country_name", "country_code")

filtered_stations.show()

+-----------+--------------------+------------+
| station_id|        country_name|country_code|
+-----------+--------------------+------------+
|AE000041196|United Arab Emirates|          AE|
|AEM00041218|United Arab Emirates|          AE|
|AGE00147715|             Algeria|          AG|
|AGE00147794|             Algeria|          AG|
|AGM00060402|             Algeria|          AG|
|AGM00060430|             Algeria|          AG|
|AGM00060461|             Algeria|          AG|
|AGM00060514|             Algeria|          AG|
|AGM00060515|             Algeria|          AG|
|AGM00060550|             Algeria|          AG|
|AGM00060563|             Algeria|          AG|
|AJ000037656|          Azerbaijan|          AJ|
|AJ000037674|          Azerbaijan|          AJ|
|AJ000037734|          Azerbaijan|          AJ|
|AJ000037740|          Azerbaijan|          AJ|
|AJ000037742|          Azerbaijan|          AJ|
|AJ000037744|          Azerbaijan|          AJ|
|AJ000037759|          Azerbaijan|      

In [34]:
# use broadcast join, broadcasting smaller stations table for joining to filtered daily obs. 
df_precip = daily_precip.join(F.broadcast(filtered_stations), "station_id", "inner")

df_precip.show(10)

+-----------+----------+-----+----+--------------------+------------+
| station_id|      date|value|year|        country_name|country_code|
+-----------+----------+-----+----+--------------------+------------+
|AEM00041194|2010-01-01|    0|2010|United Arab Emirates|          AE|
|AG000060390|2010-01-01|    0|2010|             Algeria|          AG|
|AG000060590|2010-01-01|    0|2010|             Algeria|          AG|
|AG000060611|2010-01-01|    0|2010|             Algeria|          AG|
|AGE00147708|2010-01-01|    5|2010|             Algeria|          AG|
|AGE00147716|2010-01-01|    5|2010|             Algeria|          AG|
|AGE00147718|2010-01-01|    0|2010|             Algeria|          AG|
|AGM00060351|2010-01-01|    0|2010|             Algeria|          AG|
|AGM00060355|2010-01-01|    0|2010|             Algeria|          AG|
|AGM00060360|2010-01-01|    0|2010|             Algeria|          AG|
+-----------+----------+-----+----+--------------------+------------+
only showing top 10 

In [35]:
#create table for average PRCP, grouping year, country and country code(only to keep country code in table), using function 
# average to calculated rainfall average. Ordering by highest rainfall.
avg_precip = (
    df_precip.groupBy("year", "country_name", "country_code")
    .agg(F.avg("value").alias("average_rainfall"))
    .orderBy(F.desc("average_rainfall"))
)

avg_precip.show(30)

+----+--------------------+------------+------------------+
|year|        country_name|country_code|  average_rainfall|
+----+--------------------+------------+------------------+
|2000|   Equatorial Guinea|          EK|            4361.0|
|1975|  Dominican Republic|          DR|            3414.0|
|1974|                Laos|          LA|            2480.5|
|1978|              Belize|          BH| 2244.714285714286|
|1979|        Sint Maarten|          NN|            1967.0|
|1974|          Costa Rica|          CS|            1820.0|
|1979|              Belize|          BH|1755.5454545454545|
|1973|            Suriname|          NS|            1710.0|
|1978|             Curacao|          UC|1675.0384615384614|
|1977|              Belize|          BH|1541.7142857142858|
|1978|            Honduras|          HO|1469.6122448979593|
|1977|             Curacao|          UC|1442.5384615384614|
|1978|        Sint Maarten|          NN|1292.8695652173913|
|1977|            Honduras|          HO|

In [24]:
# how many observations went into Equatorial Guineas high rainfall average?
# filter by country name and year
EG_count = df_precip.filter(F.col("country_name") == "Equatorial Guinea").filter(F.col("year") == "2000")

EG_count.show()

+-----------+----------+-----+----+-----------------+------------+
| station_id|      date|value|year|     country_name|country_code|
+-----------+----------+-----+----+-----------------+------------+
|EKM00064810|2000-06-22| 4361|2000|Equatorial Guinea|          EK|
+-----------+----------+-----+----+-----------------+------------+



In [25]:
df_precip.filter(F.col("country_name") == "Dominican Republic").filter(F.col("year") == "1975").show()

+-----------+----------+-----+----+------------------+------------+
| station_id|      date|value|year|      country_name|country_code|
+-----------+----------+-----+----+------------------+------------+
|DRM00078486|1975-12-10| 3414|1975|Dominican Republic|          DR|
+-----------+----------+-----+----+------------------+------------+



In [36]:
df_precip.filter(F.col("country_name") == "Nicaragua").filter(F.col("year") == "2023").show()

+-----------+----------+-----+----+------------+------------+
| station_id|      date|value|year|country_name|country_code|
+-----------+----------+-----+----+------------+------------+
|NUM00078741|2023-06-23|    0|2023|   Nicaragua|          NU|
+-----------+----------+-----+----+------------+------------+



In [42]:
# commented out so won't re-save when running notebook again

# (
#     avg_precip.repartition(1)
#     .write
#     .format("com.databricks.spark.csv")
#     .option("header", "true")
#     .option("compression", "gzip")
#     .mode("overwrite")
#     .save("/user/dcp31/assignment_1a/precip.csv")
# )

In [43]:
# !hdfs dfs -get /user/dcp31/assignment_1a/precip.csv /users/home/dcp31/assignment_1c/

In [37]:
# Run this cell before closing the notebook or kill your spark application by hand using the link in the Spark UI

stop_spark()