# Glue Job Local Dev Notebook

This example shows that you can use ``run_jupyter_lab_in_container.py`` script to run Glue Jupyter Lab. Then you can do Glue develop locally. This is super useful for testing Spark / Glue API.

In [9]:
# standard library
import typing as T
import sys
import os
import dataclasses
from pprint import pprint

# third party library
from boto_session_manager import BotoSesManager

# pyspark and glue stuff
from pyspark.context import SparkContext
from pyspark.sql.functions import udf
from pyspark.sql.types import IntegerType

from awsglue.dynamicframe import DynamicFrame
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.utils import getResolvedOptions

# custom library
from simple_glue.glue_libs.pyspark_utils import double_a_column

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [2]:
spark_ctx = SparkContext.getOrCreate()
glue_ctx = GlueContext(spark_ctx)
spark_ses = glue_ctx.spark_session

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…



In [3]:
df = spark.createDataFrame(
    [
        (1, 1, 1),
        (2, 2, 2),
        (3, 3, 3),
    ], 
    ("id", "v1", "v2")
)
df.show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+---+---+---+
| id| v1| v2|
+---+---+---+
|  1|  1|  1|
|  2|  2|  2|
|  3|  3|  3|
+---+---+---+

In [4]:
# Define a UDF
# ref: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.functions.udf.html#pyspark.sql.functions.udf
udf_x_100 = udf(
    f=lambda x: x * 100, 
    returnType=IntegerType(),
)

# Apply UDF to column
df_v1_v2_x_100 = df.select(
    df.id, 
    udf_x_100(df.v1).alias("v1_x_100"),
    udf_x_100(df.v2).alias("v2_x_100"),
)

df_v1_v2_x_100.show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+---+--------+--------+
| id|v1_x_100|v2_x_100|
+---+--------+--------+
|  1|     100|     100|
|  2|     200|     200|
|  3|     300|     300|
+---+--------+--------+

In [11]:
# use third party library locally
bsm = BotoSesManager()
bsm.print_who_am_i()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

User Id = AIDA***KA6K
AWS Account Id = 87********59
Principal Arn = arn:aws:iam::87********59:user/sanhe
AWS Account Alias = bmt-app-dev
AWS Region = us-east-1

In [12]:
# use your own Glue Python library locally
double_a_column(df, col=df.v1, col_name="v1_x_2").show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+---+---+---+------+
| id| v1| v2|v1_x_2|
+---+---+---+------+
|  1|  1|  1|     2|
|  2|  2|  2|     4|
|  3|  3|  3|     6|
+---+---+---+------+