In [62]:
import numpy as np
import pandas as pd
from pydataset import data
from math import sqrt

import pyspark
from pyspark.sql.functions import col, expr 
from pyspark.sql.functions import round, concat, sum, min, max, count, avg, mean, lit
from pyspark.sql.functions import regexp_extract, regexp_replace

# Note: The pyspark avg and mean functions are aliases of eachother

# It is very common to see something like:
# from pyspark.sql.functions import *
# which will import all of the functions from the pyspark.sql.functions module.

# It is also very common to see something like:
# import pyspark.sql.functions as F
# which will import all of the functions from the pyspark.sql.functions module.

## Spark Dataframes
- Look like pandas dataframes
- Share some of the same methods and syntax
- But they are 2 seperate types of objects

### Create Spark Session

In [2]:
spark = pyspark.sql.SparkSession.builder.getOrCreate()

### Create Spark Dataframes

In [3]:
# Create pandas dataframe by columns using dictionary-like object

pd_df = pd.DataFrame({'col1': ['r1c1', 'r2c1', 'r3c1'],
                      'col2': ['r1c2', 'r2c2', 'r3c2'],
                      'col3': ['r1c3', 'r3c3', 'r3c3']
                        }, 
                     index = [1, 2, 3])
pd_df

Unnamed: 0,col1,col2,col3
1,r1c1,r1c2,r1c3
2,r2c1,r2c2,r3c3
3,r3c1,r3c2,r3c3


In [4]:
# create pandas dataframe by rows

pd_df = pd.DataFrame([['r1c1', 'r1c2', 'r1c3'], 
                      ['r2c1', 'r2c2', 'r2c3'], 
                      ['r3c1', 'r3c2', 'r3c3']
                      ], 
                     index = [1, 2, 3], 
                     columns = ['col1', 'col2', 'col3'])

pd_df

Unnamed: 0,col1,col2,col3
1,r1c1,r1c2,r1c3
2,r2c1,r2c2,r2c3
3,r3c1,r3c2,r3c3


In [5]:
# Create the Spark dataframe from Pandas dataframe using spark.createDataFrame()

sp_df = spark.createDataFrame(pd_df)
sp_df

DataFrame[col1: string, col2: string, col3: string]

**Takeaways**<br>
Notice that, while we do see the column names, we don't see the data in the dataframe like we would with a pandas dataframe. This is because **spark is lazy**, in that it won't show us values until it has to. For the purposes of looking at the first few rows of our data, we can use the `.show` method (defaults of 20).

### Show data in the pyspark dataframe by `.show()`

In [6]:
sp_df.show()

+----+----+----+
|col1|col2|col3|
+----+----+----+
|r1c1|r1c2|r1c3|
|r2c1|r2c2|r2c3|
|r3c1|r3c2|r3c3|
+----+----+----+



In [7]:
# Like pandas dataframes, spark dataframes have a .describe() method:

sp_df.describe()

DataFrame[summary: string, col1: string, col2: string, col3: string]

**Takeaways**<br>
Also like pandas, returns another dataframe. However, since this is a spark dataframe, we have to explicitly show it.

In [8]:
sp_df.describe().show()

+-------+----+----+----+
|summary|col1|col2|col3|
+-------+----+----+----+
|  count|   3|   3|   3|
|   mean|null|null|null|
| stddev|null|null|null|
|    min|r1c1|r1c2|r1c3|
|    max|r3c1|r3c2|r3c3|
+-------+----+----+----+



By default spark will show the first 20 rows, but we can specify how many we want by passing a number to `.show`. Let's use some different data so that we have a more robust dataset:

In [9]:
# Load the mpg data in spark dataframe format

mpg = spark.createDataFrame(data('mpg'))
mpg.show(5)

+------------+-----+-----+----+---+----------+---+---+---+---+-------+
|manufacturer|model|displ|year|cyl|     trans|drv|cty|hwy| fl|  class|
+------------+-----+-----+----+---+----------+---+---+---+---+-------+
|        audi|   a4|  1.8|1999|  4|  auto(l5)|  f| 18| 29|  p|compact|
|        audi|   a4|  1.8|1999|  4|manual(m5)|  f| 21| 29|  p|compact|
|        audi|   a4|  2.0|2008|  4|manual(m6)|  f| 20| 31|  p|compact|
|        audi|   a4|  2.0|2008|  4|  auto(av)|  f| 21| 30|  p|compact|
|        audi|   a4|  2.8|1999|  6|  auto(l5)|  f| 16| 26|  p|compact|
+------------+-----+-----+----+---+----------+---+---+---+---+-------+
only showing top 5 rows



In [10]:
# sp_df.head() method

mpg.head(5) # Return a list of pyspark.sql.types.Row

[Row(manufacturer='audi', model='a4', displ=1.8, year=1999, cyl=4, trans='auto(l5)', drv='f', cty=18, hwy=29, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=1.8, year=1999, cyl=4, trans='manual(m5)', drv='f', cty=21, hwy=29, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.0, year=2008, cyl=4, trans='manual(m6)', drv='f', cty=20, hwy=31, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.0, year=2008, cyl=4, trans='auto(av)', drv='f', cty=21, hwy=30, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.8, year=1999, cyl=6, trans='auto(l5)', drv='f', cty=16, hwy=26, fl='p', class='compact')]

In [11]:
# sp_df.take() method

mpg.take(5) # Return a list of pyspark.sql.types.Row

[Row(manufacturer='audi', model='a4', displ=1.8, year=1999, cyl=4, trans='auto(l5)', drv='f', cty=18, hwy=29, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=1.8, year=1999, cyl=4, trans='manual(m5)', drv='f', cty=21, hwy=29, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.0, year=2008, cyl=4, trans='manual(m6)', drv='f', cty=20, hwy=31, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.0, year=2008, cyl=4, trans='auto(av)', drv='f', cty=21, hwy=30, fl='p', class='compact'),
 Row(manufacturer='audi', model='a4', displ=2.8, year=1999, cyl=6, trans='auto(l5)', drv='f', cty=16, hwy=26, fl='p', class='compact')]

In [12]:
# sp_df.describe()

mpg.describe().show() # Not quite readable

+-------+------------+-----------------+------------------+-----------------+-----------------+----------+---+------------------+-----------------+----+-------+
|summary|manufacturer|            model|             displ|             year|              cyl|     trans|drv|               cty|              hwy|  fl|  class|
+-------+------------+-----------------+------------------+-----------------+-----------------+----------+---+------------------+-----------------+----+-------+
|  count|         234|              234|               234|              234|              234|       234|234|               234|              234| 234|    234|
|   mean|        null|             null| 3.471794871794872|           2003.5|5.888888888888889|      null|4.0|16.858974358974358|23.44017094017094|null|   null|
| stddev|        null|             null|1.2919590310839348|4.509646313320436|1.611534484684289|      null|0.0| 4.255945678889394|5.954643441166448|null|   null|
|    min|        audi|      4runne

In [13]:
# sp_df.columns

mpg.columns # returns the list of column names

['manufacturer',
 'model',
 'displ',
 'year',
 'cyl',
 'trans',
 'drv',
 'cty',
 'hwy',
 'fl',
 'class']

In [14]:
# sp_df.count()

mpg.count() # returns the number of rows

234

In [15]:
# sp_df.distinct().count()

mpg.distinct().count() # returns the number of distinct rows

225

In [16]:
# sp_df.printSchema()

mpg.printSchema()

root
 |-- manufacturer: string (nullable = true)
 |-- model: string (nullable = true)
 |-- displ: double (nullable = true)
 |-- year: long (nullable = true)
 |-- cyl: long (nullable = true)
 |-- trans: string (nullable = true)
 |-- drv: string (nullable = true)
 |-- cty: long (nullable = true)
 |-- hwy: long (nullable = true)
 |-- fl: string (nullable = true)
 |-- class: string (nullable = true)



In [17]:
# Load the mpg in pandas df

df = data('mpg')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 234 entries, 1 to 234
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   manufacturer  234 non-null    object 
 1   model         234 non-null    object 
 2   displ         234 non-null    float64
 3   year          234 non-null    int64  
 4   cyl           234 non-null    int64  
 5   trans         234 non-null    object 
 6   drv           234 non-null    object 
 7   cty           234 non-null    int64  
 8   hwy           234 non-null    int64  
 9   fl            234 non-null    object 
 10  class         234 non-null    object 
dtypes: float64(1), int64(4), object(6)
memory usage: 21.9+ KB


### Columns
- Pandas series vs. Spark column objects
- A column object represents a vertical slice of a dataframe, but does not contain the data itself.
- You will use it to perform functions on and reference that column.

### Create a pyspark dataframe column object

In [18]:
# Create a pandas series

df.model.head()

1    a4
2    a4
3    a4
4    a4
5    a4
Name: model, dtype: object

In [19]:
# Create a spark df column object

mpg.model

Column<b'model'>

In [20]:
# Alternative way to create a spark df column object

mpg['hwy']

Column<b'hwy'>

In [21]:
# sp_df column

mpg.model.show() # 'Column' object is not callable

TypeError: 'Column' object is not callable

- While this expression would produce a Series of values from a pandas dataframe, for a spark dataframe this produces a Column object, which is an object that represents a vertical slice of a dataframe, but does not contain the data itself.

- One way to use our column objects is to use them in combination with the `.select` method. `.select` is very powerful, and lets us specify what data we want to see in the resulting dataframe.

### To speficy the data we want by using `select()` method

In [22]:
# Select columns hwy, cty, and model in pandas df

df[['hwy', 'cty', 'model']].head()

Unnamed: 0,hwy,cty,model
1,29,18,a4
2,29,21,a4
3,31,20,a4
4,30,21,a4
5,26,16,a4


In [23]:
# Select columns hwy, cty, and model (Create a spark dataframe object)

mpg.select(mpg.hwy, mpg.cty, mpg.model)

DataFrame[hwy: bigint, cty: bigint, model: string]

Again, notice that we don't see any data, instead we see the new dataframe that is produced. To see the actual data, we'll again need to use `.show`

In [24]:
# Select and show columns

mpg.select(mpg.hwy, mpg.cty, mpg.model).show(5)

+---+---+-----+
|hwy|cty|model|
+---+---+-----+
| 29| 18|   a4|
| 29| 21|   a4|
| 31| 20|   a4|
| 30| 21|   a4|
| 26| 16|   a4|
+---+---+-----+
only showing top 5 rows



In [25]:
# Alternative way to select and show spark df columns

mpg.select('hwy', 'cty', 'model').show(5)

+---+---+-----+
|hwy|cty|model|
+---+---+-----+
| 29| 18|   a4|
| 29| 21|   a4|
| 31| 20|   a4|
| 30| 21|   a4|
| 26| 16|   a4|
+---+---+-----+
only showing top 5 rows



### Column objects support operations such as arithmetic operations

In [26]:
# Pandas Series support arithmetic operations

(df.hwy + 1).head()

1    30
2    30
3    32
4    31
5    27
Name: hwy, dtype: int64

In [27]:
# pyspark df columns object also support arithmetic operations

mpg.hwy + 1

Column<b'(hwy + 1)'>

Here we get back a column that represents the values from the original hwy column with 1 added to them. To actually see this data, we'd need to select it and show the dataframe.

In [28]:
mpg.select(mpg.hwy, mpg.hwy + 1, mpg.hwy*5).show(5)

+---+---------+---------+
|hwy|(hwy + 1)|(hwy * 5)|
+---+---------+---------+
| 29|       30|      145|
| 29|       30|      145|
| 31|       32|      155|
| 30|       31|      150|
| 26|       27|      130|
+---+---------+---------+
only showing top 5 rows



In [None]:
# Do NOT work
mpg.select('hwy', 'hwy' + 1, 'hwy'*5).show(5) 

# TypeError: can only concatenate str (not "int") to str

In [29]:
# An alternative way

mpg.select(mpg['hwy'], mpg['hwy'] + 1, mpg['hwy']*5).show(5)

+---+---------+---------+
|hwy|(hwy + 1)|(hwy * 5)|
+---+---------+---------+
| 29|       30|      145|
| 29|       30|      145|
| 31|       32|      155|
| 30|       31|      150|
| 26|       27|      130|
+---+---------+---------+
only showing top 5 rows



What if spark column object + "string"? No

In [30]:
mpg.select(mpg.model, mpg.model + "string").show(5)

+-----+----------------+
|model|(model + string)|
+-----+----------------+
|   a4|            null|
|   a4|            null|
|   a4|            null|
|   a4|            null|
|   a4|            null|
+-----+----------------+
only showing top 5 rows



### Rename the column object using the `.alias` method

In [31]:
mpg.select(mpg.hwy.alias('highway_mileage'),
           (mpg.hwy+1).alias('highway_mileage_plus1')).show(5)

+---------------+---------------------+
|highway_mileage|highway_mileage_plus1|
+---------------+---------------------+
|             29|                   30|
|             29|                   30|
|             31|                   32|
|             30|                   31|
|             26|                   27|
+---------------+---------------------+
only showing top 5 rows



### Store column objects and reference them

In [32]:
col1 = mpg.hwy.alias('highway_mileage')
col2 = (mpg.hwy/2).alias('highway_mileage_halved')

mpg.select(col1, col2).show(5)

+---------------+----------------------+
|highway_mileage|highway_mileage_halved|
+---------------+----------------------+
|             29|                  14.5|
|             29|                  14.5|
|             31|                  15.5|
|             30|                  15.0|
|             26|                  13.0|
+---------------+----------------------+
only showing top 5 rows



### Use `col` functions to create columns

In [33]:
# Create the hwy column object

col('hwy')

Column<b'hwy'>

In [34]:
# Create the hwy column object

col("hwy")

Column<b'hwy'>

In [35]:
# Create the hwy column object

mpg.hwy

Column<b'hwy'>

- The column object produced by the col function is the same as the the previous column object we saw.
- We can create avg_mileage using the col function to produce pyspark Column objects and using the arithmetic operators to combine them.

In [36]:
# Create the pyspark column object avg_col 

avg_col = (col("hwy") + col("cty")) / 2
avg_col

Column<b'((hwy + cty) / 2)'>

In [37]:
# Print the first 5 rows of the selected columns

mpg.select(
    col("hwy").alias("hwy_mileage"), 
    mpg.cty.alias("cty_mileage"), 
    avg_col.alias('avg_mileage')
).show(5)

+-----------+-----------+-----------+
|hwy_mileage|cty_mileage|avg_mileage|
+-----------+-----------+-----------+
|         29|         18|       23.5|
|         29|         21|       25.0|
|         31|         20|       25.5|
|         30|         21|       25.5|
|         26|         16|       21.0|
+-----------+-----------+-----------+
only showing top 5 rows



### Use `expr` functions to create columns
- The `expr` function is more powerful than col. 
- It does everything col does and more. 
- `expr` returns the same type of column object, but allows us to express manipulations to the column within the string that defines the column.

In [38]:
mpg.select(
    expr("hwy"), 
    expr("hwy + 1"),
    expr("hwy AS highway_mileage"), 
    expr("hwy + 1 AS highway_mileage")
).show(5)

+---+---------+---------------+---------------+
|hwy|(hwy + 1)|highway_mileage|highway_mileage|
+---+---------+---------------+---------------+
| 29|       30|             29|             30|
| 29|       30|             29|             30|
| 31|       32|             31|             32|
| 30|       31|             30|             31|
| 26|       27|             26|             27|
+---+---------+---------------+---------------+
only showing top 5 rows



Note that all the columns created below are identical, and which syntax to use is merely a style choice.

In [39]:
mpg.select(
    mpg.hwy.alias("highway"),
    col("hwy").alias("highway"),
    expr("hwy").alias("highway"),
    expr("hwy AS highway")
).show(5)

+-------+-------+-------+-------+
|highway|highway|highway|highway|
+-------+-------+-------+-------+
|     29|     29|     29|     29|
|     29|     29|     29|     29|
|     31|     31|     31|     31|
|     30|     30|     30|     30|
|     26|     26|     26|     26|
+-------+-------+-------+-------+
only showing top 5 rows



### Spark SQL
- As we've seen through the column definitions, spark is very flexible and allows us many different ways to express ourselves. 
- Another way that is fairly different than what we've seen above is through spark SQL, which lets us write SQL queries against our spark dataframes.

In [40]:
# In order to start using spark SQL, we'll first "register" the table with spark

mpg.createOrReplaceTempView("mpg_view")

In [41]:
# # What if I try to print the mpg_view? 

# mpg_view

# # NameError: name 'mpg_view' is not defined

Now we can write a sql querry against the mpg table.

In [42]:
spark.sql(
    """
    SELECT hwy, cty, (hwy + cty)/2 AS avg
    from mpg_view
    """
)

DataFrame[hwy: bigint, cty: bigint, avg: double]

Notice that the resulting value is another dataframe. As we know, in order to view the values in a dataframe, we need to use `.show`

In [43]:
spark.sql(
    """
    SELECT hwy, cty, (hwy + cty)/2 AS avg
    from mpg_view
    """
).show(5)

+---+---+----+
|hwy|cty| avg|
+---+---+----+
| 29| 18|23.5|
| 29| 21|25.0|
| 31| 20|25.5|
| 30| 21|25.5|
| 26| 16|21.0|
+---+---+----+
only showing top 5 rows



**Note:** All of these methods for creating / manipulating dataframes are the same in terms of performance. The resulting dataframes get turned into the same spark code that gets executed on the JVM, so it really is just a style choice as to which to use.

### Type Casting
**View column datatypes** using `dtypes` or `printSchema()`

In [44]:
mpg.dtypes

[('manufacturer', 'string'),
 ('model', 'string'),
 ('displ', 'double'),
 ('year', 'bigint'),
 ('cyl', 'bigint'),
 ('trans', 'string'),
 ('drv', 'string'),
 ('cty', 'bigint'),
 ('hwy', 'bigint'),
 ('fl', 'string'),
 ('class', 'string')]

In [45]:
mpg.printSchema()

root
 |-- manufacturer: string (nullable = true)
 |-- model: string (nullable = true)
 |-- displ: double (nullable = true)
 |-- year: long (nullable = true)
 |-- cyl: long (nullable = true)
 |-- trans: string (nullable = true)
 |-- drv: string (nullable = true)
 |-- cty: long (nullable = true)
 |-- hwy: long (nullable = true)
 |-- fl: string (nullable = true)
 |-- class: string (nullable = true)



To **convert** from one type to another, we can use the `.cast` method on a column.

In [46]:
mpg.select(
    mpg.hwy,
    mpg.hwy.cast("string")
).dtypes

[('hwy', 'bigint'), ('hwy', 'string')]

In [47]:
mpg.select(
    mpg.hwy,
    mpg.hwy.cast("string")
).printSchema()

root
 |-- hwy: long (nullable = true)
 |-- hwy: string (nullable = true)



If a value is not able to be converted, it will be replaced with null.

In [48]:
mpg.select(
    mpg.model, 
    col("model").cast("int")
).show(5)

+-----+-----+
|model|model|
+-----+-----+
|   a4| null|
|   a4| null|
|   a4| null|
|   a4| null|
|   a4| null|
+-----+-----+
only showing top 5 rows



### Basic Built-in Functions
There are many other functions beyong `col` and `expr` within the `pyspark.sql.functions` module for operating on pyspark dataframe columns.
- `concat`: to concatenate strings
- `sum`: to sum a group
- `avg`: to take the avereage of a group
- `min`: to find the minimum
- `max`: to find the maximum

**Note that importing the `sum` function directly will override the built-in sum function.** This means you will get an error if you try to sum a list of numbers, because sum will refernce the pyspark sum function, which works with pyspark dataframe columns, while the built-in sum function works with lists of numbers. The same holds true for the built in `min` and `max` functions.

In [50]:
# Try out some functions:

mpg.select(
    (sum(mpg.hwy) / count(mpg.hwy)).alias('avg_1'), 
    round(avg(mpg.hwy), 2).alias('avg_2'), 
    mean(mpg.hwy).alias('avg_3'),
    min(mpg.hwy), 
    max(mpg.hwy)
).show(5)

+-----------------+-----+-----------------+--------+--------+
|            avg_1|avg_2|            avg_3|min(hwy)|max(hwy)|
+-----------------+-----+-----------------+--------+--------+
|23.44017094017094|23.44|23.44017094017094|      12|      44|
+-----------------+-----+-----------------+--------+--------+



In [52]:
mpg.select(
    mpg.manufacturer, 
    mpg.model,
    concat(mpg.manufacturer, mpg.model)
).show(5)

+------------+-----+---------------------------+
|manufacturer|model|concat(manufacturer, model)|
+------------+-----+---------------------------+
|        audi|   a4|                     audia4|
|        audi|   a4|                     audia4|
|        audi|   a4|                     audia4|
|        audi|   a4|                     audia4|
|        audi|   a4|                     audia4|
+------------+-----+---------------------------+
only showing top 5 rows



In order to use a string literal as part of our select, we'll need to use the `lit` function, otherwise spark will try to resolve our string as a column.

In [56]:
# Compare

mpg.select(
    mpg.cyl,
    concat(mpg.cyl, lit(" cylinders"))
).show(5)

+---+-----------------------+
|cyl|concat(cyl,  cylinders)|
+---+-----------------------+
|  4|            4 cylinders|
|  4|            4 cylinders|
|  4|            4 cylinders|
|  4|            4 cylinders|
|  6|            6 cylinders|
+---+-----------------------+
only showing top 5 rows



Here we select the concatenation of the number of cylinders (the value from the cyl column) and the string literal " cylinders".

### More pyspark functions for string manipulation
- `regexp_extract`
- `regexp_replace`

In [58]:
# In order to demonstrate these functions we'll create a dataframe with some text data.

textdf = spark.createDataFrame(
    pd.DataFrame(
        {
            "address": [
                "600 Navarro St ste 600, San Antonio, TX 78205",
                "3130 Broadway St, San Antonio, TX 78209",
                "303 Pearl Pkwy, San Antonio, TX 78215",
                "1255 SW Loop 410, San Antonio, TX 78227",
            ]
        }
    )
)

textdf.show(truncate=False)

+---------------------------------------------+
|address                                      |
+---------------------------------------------+
|600 Navarro St ste 600, San Antonio, TX 78205|
|3130 Broadway St, San Antonio, TX 78209      |
|303 Pearl Pkwy, San Antonio, TX 78215        |
|1255 SW Loop 410, San Antonio, TX 78227      |
+---------------------------------------------+



`regexp_extract`: specify a regular expression with at least one capture group, and create a new column based on the contents of a capture group.
- first argument: the name of the string column to extract from.
- second argument: the regular expression itself.
- last argument: specifies which capture group we want to use. If, for example, our regular expression had 2 capture groups in it and we wanted the contents of the 2nd group, we would specify a 2 here.

In [87]:
textdf.select(
    "address", 
    regexp_extract("address", 
                   r"^(\d+)", 1).alias("street_no"),
    regexp_extract("address", 
                   r"^\d+\s([\w\s]+?),", 1).alias("street")
).show(truncate=False)

+---------------------------------------------+---------+------------------+
|address                                      |street_no|street            |
+---------------------------------------------+---------+------------------+
|600 Navarro St ste 600, San Antonio, TX 78205|600      |Navarro St ste 600|
|3130 Broadway St, San Antonio, TX 78209      |3130     |Broadway St       |
|303 Pearl Pkwy, San Antonio, TX 78215        |303      |Pearl Pkwy        |
|1255 SW Loop 410, San Antonio, TX 78227      |1255     |SW Loop 410       |
+---------------------------------------------+---------+------------------+



`regexp_replace` lets us make substitutions based on a regular expression.

Below, we obtain just the city, state, and zip code of the address by replacing everything up to the first comma with an empty string.

In [90]:
textdf.select(
    "address", 
    regexp_replace("address", r"^.*?,\s*", "").alias("city_state_zip")
).show(truncate=False)

+---------------------------------------------+---------------------+
|address                                      |city_state_zip       |
+---------------------------------------------+---------------------+
|600 Navarro St ste 600, San Antonio, TX 78205|San Antonio, TX 78205|
|3130 Broadway St, San Antonio, TX 78209      |San Antonio, TX 78209|
|303 Pearl Pkwy, San Antonio, TX 78215        |San Antonio, TX 78215|
|1255 SW Loop 410, San Antonio, TX 78227      |San Antonio, TX 78227|
+---------------------------------------------+---------------------+

