# Find names that start/end with 'a' 

In [1]:
import os
os.getcwd()
os.chdir("H:\pyspark_advanced-coding_interview")
os.getcwd()

'H:\\pyspark_advanced-coding_interview'

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType

# Initialize Spark Session
spark = SparkSession.builder.appName("NamePatternMatching").getOrCreate()

# Define schema and sample data
schema = StructType([
    StructField("ID", StringType(), True),
    StructField("Name", StringType(), True)
])

# Sample data with names
data = [
    ("1", "Anna"),
    ("2", "Amelia"),
    ("3", "Brian"),
    ("4", "Sandra"),
    ("5", "Angela"),
    ("6", "Diana"),
    ("7", "Alexa"),
    ("8", "Aaron"),
    ("9", "Amanda"),
    ("10", "Sofia")
]

# Create DataFrame
df = spark.createDataFrame(data, schema)
df.show(truncate=False)


+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|3  |Brian |
|4  |Sandra|
|5  |Angela|
|6  |Diana |
|7  |Alexa |
|8  |Aaron |
|9  |Amanda|
|10 |Sofia |
+---+------+



# Pyspark

In [2]:
# Find names that start with 'A'
names_start_with_a = df.filter(df["Name"].startswith("A"))
names_start_with_a.show(truncate=False)

# Find names that end with 'a'
names_end_with_a = df.filter(df["Name"].endswith("a"))
names_end_with_a.show(truncate=False)


+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|5  |Angela|
|7  |Alexa |
|8  |Aaron |
|9  |Amanda|
+---+------+

+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|4  |Sandra|
|5  |Angela|
|6  |Diana |
|7  |Alexa |
|9  |Amanda|
|10 |Sofia |
+---+------+



In [3]:
# Use regular expressions to find names that start with 'A'
names_start_with_a_regex = df.filter(df["Name"].rlike("^A"))
names_start_with_a_regex.show(truncate=False)

# Use regular expressions to find names that end with 'a'
names_end_with_a_regex = df.filter(df["Name"].rlike("a$"))
names_end_with_a_regex.show(truncate=False)


+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|5  |Angela|
|7  |Alexa |
|8  |Aaron |
|9  |Amanda|
+---+------+

+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|4  |Sandra|
|5  |Angela|
|6  |Diana |
|7  |Alexa |
|9  |Amanda|
|10 |Sofia |
+---+------+



# Spark SQL

In [4]:
# Register as a temporary table
df.createOrReplaceTempView("NamesTable")


In [5]:
# SQL Query to find names starting with 'A'
sql_query_start = """
SELECT * FROM NamesTable WHERE Name LIKE 'A%'
"""

# Execute the query
result_start_sql = spark.sql(sql_query_start)
result_start_sql.show(truncate=False)

# SQL Query to find names ending with 'a'
sql_query_end = """
SELECT * FROM NamesTable WHERE Name LIKE '%a'
"""

# Execute the query
result_end_sql = spark.sql(sql_query_end)
result_end_sql.show(truncate=False)


+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|5  |Angela|
|7  |Alexa |
|8  |Aaron |
|9  |Amanda|
+---+------+

+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|4  |Sandra|
|5  |Angela|
|6  |Diana |
|7  |Alexa |
|9  |Amanda|
|10 |Sofia |
+---+------+



In [6]:
# SQL Query to find names using regex for starting 'A'
sql_query_rlike_start = """
SELECT * FROM NamesTable WHERE Name RLIKE '^A'
"""

# Execute the query
result_rlike_start_sql = spark.sql(sql_query_rlike_start)
result_rlike_start_sql.show(truncate=False)

# SQL Query to find names using regex for ending 'a'
sql_query_rlike_end = """
SELECT * FROM NamesTable WHERE Name RLIKE 'a$'
"""

# Execute the query
result_rlike_end_sql = spark.sql(sql_query_rlike_end)
result_rlike_end_sql.show(truncate=False)


+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|5  |Angela|
|7  |Alexa |
|8  |Aaron |
|9  |Amanda|
+---+------+

+---+------+
|ID |Name  |
+---+------+
|1  |Anna  |
|2  |Amelia|
|4  |Sandra|
|5  |Angela|
|6  |Diana |
|7  |Alexa |
|9  |Amanda|
|10 |Sofia |
+---+------+



# Python

In [7]:
# Example list of names
names = ["Anna", "Amelia", "Brian", "Sandra", "Angela", "Diana", "Alexa", "Aaron", "Amanda", "Sofia"]

# Names that start with 'A'
names_start_with_a = [name for name in names if name.startswith("A")]
print(names_start_with_a)  # Output: ['Anna', 'Amelia', 'Angela', 'Alexa', 'Aaron', 'Amanda']

# Names that end with 'a'
names_end_with_a = [name for name in names if name.endswith("a")]
print(names_end_with_a)  # Output: ['Anna', 'Amelia', 'Sandra', 'Angela', 'Diana', 'Alexa', 'Amanda', 'Sofia']


['Anna', 'Amelia', 'Angela', 'Alexa', 'Aaron', 'Amanda']
['Anna', 'Amelia', 'Sandra', 'Angela', 'Diana', 'Alexa', 'Amanda', 'Sofia']


In [None]:
import re

# Names that start with 'A' using regex
names_start_with_a_regex = [name for name in names if re.match("^A", name)]
print(names_start_with_a_regex)  # Output: ['Anna', 'Amelia', 'Angela', 'Alexa', 'Aaron', 'Amanda']

# Names that end with 'a' using regex
names_end_with_a_regex = [name for name in names if re.search("a$", name)]
print(names_end_with_a_regex)  # Output: ['Anna', 'Amelia', 'Sandra', 'Angela', 'Diana', 'Alexa', 'Amanda', 'Sofia']
