<a href="https://colab.research.google.com/github/alvarofernandezmalagon/Basic_Operations_PySpark/blob/master/basic_operations_pyspark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Objective




The objective of this notebook is to review the basic operations of PySpark 

# Create the environment of PySpark

In [0]:
#We have to install Spark and Java in Colab
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-us.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
!tar xf spark-2.4.5-bin-hadoop2.7.tgz
!pip install -q findspark

In [0]:
#It is time to set the environment path that enables us to run PySpark in our Colab environment
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.5-bin-hadoop2.7"

In [0]:
#We can run a local spark session to test our installation:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [4]:
spark

# Get data

### Get access to Google Drive


In [0]:
from google.colab import drive

In [6]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


### Change the directory

In [7]:
%cd /content/drive/My Drive/02-Colab/01-PySpark/00-Repaso_Pyspark/01-Script

/content/drive/My Drive/02-Colab/01-PySpark/00-Repaso_Pyspark/01-Script


###Unzip the data

In [8]:
!ls ../00-Data

fire-incidents.csv.zip


In [9]:
!unzip ../00-Data/fire-incidents.csv.zip -d ../00-Data/

Archive:  ../00-Data/fire-incidents.csv.zip
  inflating: ../00-Data/fire-incidents.csv  


In [10]:
!ls -l ../00-Data

total 236153
-rw------- 1 root root 207085551 Dec  6 12:15 fire-incidents.csv
-rw------- 1 root root  34734540 May  5 15:36 fire-incidents.csv.zip


#Import libraries

In [0]:
from pyspark.sql import functions as F
from pyspark.sql import types as T

#Create the path

In [12]:
filename = 'fire-incidents.csv'
data_path = '/content/drive/My Drive/02-Colab/01-PySpark/00-Repaso_Pyspark/00-Data/'+filename
print(data_path)

/content/drive/My Drive/02-Colab/01-PySpark/00-Repaso_Pyspark/00-Data/fire-incidents.csv


#Load data in memory

In [0]:
fire_df = spark.read.csv(data_path,
                         header=True,
                         inferSchema=True,
                         sep=",")

In [14]:
fire_df.show(5,truncate=False)

+---------------+---------------+----------------------+-------------------+-----------+-------------------+-------------------+-------------------+-------------+-------+---------+------------+----+-----------------+---------------------+---------+-------------+-----------+---------------+-------------------+-----------------------+-----------------------+---------------+-------------+-------------------+-----------------+----------------+----------------------------------------------------+----------+--------------------+----------------------+------------------+--------------------------+---------------------------------------------+-------------------+--------------+-----------------------+-------------------------+-----------+------------------+--------------------------------------+--------------+----------------+--------------------+-----------+--------------+------------------------------------+----------------------------------------+----------------------------------+---------

###Get the columns and types

In [15]:
fire_df.printSchema()

root
 |-- Incident Number: integer (nullable = true)
 |-- Exposure Number: integer (nullable = true)
 |-- Address: string (nullable = true)
 |-- Incident Date: timestamp (nullable = true)
 |-- Call Number: integer (nullable = true)
 |-- Alarm DtTm: timestamp (nullable = true)
 |-- Arrival DtTm: timestamp (nullable = true)
 |-- Close DtTm: timestamp (nullable = true)
 |-- City: string (nullable = true)
 |-- Zipcode: integer (nullable = true)
 |-- Battalion: string (nullable = true)
 |-- Station Area: string (nullable = true)
 |-- Box: string (nullable = true)
 |-- Suppression Units: integer (nullable = true)
 |-- Suppression Personnel: integer (nullable = true)
 |-- EMS Units: integer (nullable = true)
 |-- EMS Personnel: integer (nullable = true)
 |-- Other Units: integer (nullable = true)
 |-- Other Personnel: integer (nullable = true)
 |-- First Unit On Scene: string (nullable = true)
 |-- Estimated Property Loss: integer (nullable = true)
 |-- Estimated Contents Loss: double (nullab

### Get the structure

#### Get the number of columns and rows

In [16]:
#Number of rows
fire_df.count()

513405

In [17]:
#Number of columns
len(fire_df.columns)

63

#Basic PySpark operations

##Change the name of the columns

In [18]:
for column in fire_df.columns:
  new_column=column.lower().replace(" ","_")
  fire_df=fire_df.withColumnRenamed(column,new_column)
fire_df.columns

['incident_number',
 'exposure_number',
 'address',
 'incident_date',
 'call_number',
 'alarm_dttm',
 'arrival_dttm',
 'close_dttm',
 'city',
 'zipcode',
 'battalion',
 'station_area',
 'box',
 'suppression_units',
 'suppression_personnel',
 'ems_units',
 'ems_personnel',
 'other_units',
 'other_personnel',
 'first_unit_on_scene',
 'estimated_property_loss',
 'estimated_contents_loss',
 'fire_fatalities',
 'fire_injuries',
 'civilian_fatalities',
 'civilian_injuries',
 'number_of_alarms',
 'primary_situation',
 'mutual_aid',
 'action_taken_primary',
 'action_taken_secondary',
 'action_taken_other',
 'detector_alerted_occupants',
 'property_use',
 'area_of_fire_origin',
 'ignition_cause',
 'ignition_factor_primary',
 'ignition_factor_secondary',
 'heat_source',
 'item_first_ignited',
 'human_factors_associated_with_ignition',
 'structure_type',
 'structure_status',
 'floor_of_fire_origin',
 'fire_spread',
 'no_flame_spead',
 'number_of_floors_with_minimum_damage',
 'number_of_floors_wit

##Select the columns

###One way

In [19]:
#Only the names of the first 5 columns
fire_df.columns[:5]

['incident_number',
 'exposure_number',
 'address',
 'incident_date',
 'call_number']

In [20]:
#Select the name of the first 3 columns with data
fire_df.select(fire_df.columns[:3]).show(5)

+---------------+---------------+--------------------+
|incident_number|exposure_number|             address|
+---------------+---------------+--------------------+
|       19146215|              0|925 Golden Gate A...|
|       19146165|              0| 619 Holloway Avenue|
|       19146202|              0|    1485 Pine Street|
|       19146182|              0|       Church Street|
|       19146193|              0|   1430 Scott Street|
+---------------+---------------+--------------------+
only showing top 5 rows



In [21]:
#Other way
fire_df.select('incident_number','address').show(5)

+---------------+--------------------+
|incident_number|             address|
+---------------+--------------------+
|       19146215|925 Golden Gate A...|
|       19146165| 619 Holloway Avenue|
|       19146202|    1485 Pine Street|
|       19146182|       Church Street|
|       19146193|   1430 Scott Street|
+---------------+--------------------+
only showing top 5 rows



###Using SQL

In [22]:
F.col('incident_number')

Column<b'incident_number'>

##Filter and selecting columns

In [23]:
fire_df.filter(F.col("call_number")>10030109).select("call_number").show(5)

+-----------+
|call_number|
+-----------+
|  193390376|
|  193390050|
|  193390291|
|  193390165|
|  193390205|
+-----------+
only showing top 5 rows



## Get a reduce data frame to operate

### Select the first five columns

In [24]:
sub_df = fire_df.select(fire_df.columns[:5])
sub_df.show(5)

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146165|              0| 619 Holloway Avenue|2019-12-05 00:00:00|  193390050|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|  193390291|
|       19146182|              0|       Church Street|2019-12-05 00:00:00|  193390165|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390205|
+---------------+---------------+--------------------+-------------------+-----------+
only showing top 5 rows



###Filter 

In [25]:
sub_df.filter((F.col("incident_number")<50000000) &
              (F.col("call_number")>80562) &
              (F.col("address")=="310 Colon Av.")).show()


+---------------+---------------+-------------+-------------------+-----------+
|incident_number|exposure_number|      address|      incident_date|call_number|
+---------------+---------------+-------------+-------------------+-----------+
|        9030109|              0|310 Colon Av.|2009-04-12 00:00:00|   91020273|
+---------------+---------------+-------------+-------------------+-----------+



### Filter using particular expression

In [26]:
#We get all the address that contains the expression "Av"
sub_df.filter(F.col("address").like("%Av%")).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146165|              0| 619 Holloway Avenue|2019-12-05 00:00:00|  193390050|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|  193390501|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|  193390407|
|       19146145|              0|  500 Corbett Avenue|2019-12-04 00:00:00|  193384507|
|       19145869|              0|      Potrero Avenue|2019-12-04 00:00:00|  193381856|
|       19145970|              0| 601 Van Ness Avenue|2019-12-04 00:00:00|  193382874|
|       19145819|              0|  750 Pacific Avenue|2019-12-04 00:00:00|  193381358|
|       19145779|              0|    601 Gr

### Filter using a list

In [27]:
#Equal
sub_df.filter(F.col('address').isin(["310 Colon Av.","Lansdale Av","Pacific Av"])).show()

+---------------+---------------+-------------+-------------------+-----------+
|incident_number|exposure_number|      address|      incident_date|call_number|
+---------------+---------------+-------------+-------------------+-----------+
|        9030109|              0|310 Colon Av.|2009-04-12 00:00:00|   91020273|
+---------------+---------------+-------------+-------------------+-----------+



In [28]:
#Distinct
sub_df.filter(~F.col('address').isin(["310 Colon Av.","Lansdale Av","Pacific Av"])).count()

513166

### Delete specific columns

In [29]:
#If you want to use a list it´s neccesary to use a *
sub_df.drop(*["address","call_number"]).show()

+---------------+---------------+-------------------+
|incident_number|exposure_number|      incident_date|
+---------------+---------------+-------------------+
|       19146215|              0|2019-12-05 00:00:00|
|       19146165|              0|2019-12-05 00:00:00|
|       19146202|              0|2019-12-05 00:00:00|
|       19146182|              0|2019-12-05 00:00:00|
|       19146193|              0|2019-12-05 00:00:00|
|       19146198|              0|2019-12-05 00:00:00|
|       19146158|              0|2019-12-05 00:00:00|
|       19146229|              0|2019-12-05 00:00:00|
|       19146220|              0|2019-12-05 00:00:00|
|       19146248|              0|2019-12-05 00:00:00|
|       19146188|              0|2019-12-05 00:00:00|
|       19146157|              0|2019-12-05 00:00:00|
|       19146017|              0|2019-12-04 00:00:00|
|       19145960|              0|2019-12-04 00:00:00|
|       19146145|              0|2019-12-04 00:00:00|
|       19145869|           

In [30]:
#Also you can drop columns without use a list
sub_df.drop("address","call_number").show()

+---------------+---------------+-------------------+
|incident_number|exposure_number|      incident_date|
+---------------+---------------+-------------------+
|       19146215|              0|2019-12-05 00:00:00|
|       19146165|              0|2019-12-05 00:00:00|
|       19146202|              0|2019-12-05 00:00:00|
|       19146182|              0|2019-12-05 00:00:00|
|       19146193|              0|2019-12-05 00:00:00|
|       19146198|              0|2019-12-05 00:00:00|
|       19146158|              0|2019-12-05 00:00:00|
|       19146229|              0|2019-12-05 00:00:00|
|       19146220|              0|2019-12-05 00:00:00|
|       19146248|              0|2019-12-05 00:00:00|
|       19146188|              0|2019-12-05 00:00:00|
|       19146157|              0|2019-12-05 00:00:00|
|       19146017|              0|2019-12-04 00:00:00|
|       19145960|              0|2019-12-04 00:00:00|
|       19146145|              0|2019-12-04 00:00:00|
|       19145869|           

In [31]:
#If you want to drop more columns...
fire_df.drop(*fire_df.columns[4:]).show()

+---------------+---------------+--------------------+-------------------+
|incident_number|exposure_number|             address|      incident_date|
+---------------+---------------+--------------------+-------------------+
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|
|       19146165|              0| 619 Holloway Avenue|2019-12-05 00:00:00|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|
|       19146182|              0|       Church Street|2019-12-05 00:00:00|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|
|       19146198|              0|   1430 Scott Street|2019-12-05 00:00:00|
|       19146158|              0|    3351 23rd Street|2019-12-05 00:00:00|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|
|       19146248|              0|     21 Loyola Trail|2019-12-05 00:00:00|
|       19146188|        

###Drop duplicates

#### Delete duplicates based on repetitions of two or more entire lines

In [32]:
sub_df.count()

513405

In [33]:
sub_df.dropDuplicates().count()

513049

#### Delete duplicates based on repetitions of two or more specific columns

In [34]:
sub_df.dropDuplicates(subset=['incident_number']).count()

513049

### Order by column "orderBy"

It's not neccesary define asc=True because that´s by default

In [35]:
#One way
sub_df.orderBy("incident_number",ascending=False).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146248|              0|     21 Loyola Trail|2019-12-05 00:00:00|  193390662|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|  193390501|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|  193390407|
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|  193390291|
|       19146198|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390273|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390205|
|       19146188|              0|      190 9th Street|2019-12-05 00:00:00|  193390184|
|       19146182|              0|       Chu

In [36]:
#Other way using SQL
sub_df.orderBy(F.col("incident_number").desc()).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146248|              0|     21 Loyola Trail|2019-12-05 00:00:00|  193390662|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|  193390501|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|  193390407|
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|  193390291|
|       19146198|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390273|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390205|
|       19146188|              0|      190 9th Street|2019-12-05 00:00:00|  193390184|
|       19146182|              0|       Chu

### Order by column "sort"

In [37]:
sub_df.sort(F.col("incident_number"),ascending=False).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146248|              0|     21 Loyola Trail|2019-12-05 00:00:00|  193390662|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|  193390501|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|  193390407|
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|  193390291|
|       19146198|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390273|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390205|
|       19146188|              0|      190 9th Street|2019-12-05 00:00:00|  193390184|
|       19146182|              0|       Chu

In [38]:
sub_df.sort(F.col("incident_number").desc()).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|       19146248|              0|     21 Loyola Trail|2019-12-05 00:00:00|  193390662|
|       19146229|              0|    227 Grant Avenue|2019-12-05 00:00:00|  193390501|
|       19146220|              0|915 Golden Gate A...|2019-12-05 00:00:00|  193390407|
|       19146215|              0|925 Golden Gate A...|2019-12-05 00:00:00|  193390376|
|       19146202|              0|    1485 Pine Street|2019-12-05 00:00:00|  193390291|
|       19146198|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390273|
|       19146193|              0|   1430 Scott Street|2019-12-05 00:00:00|  193390205|
|       19146188|              0|      190 9th Street|2019-12-05 00:00:00|  193390184|
|       19146182|              0|       Chu

### Order by 2 or more columns

In [39]:
sub_df.sort(["incident_number","address"],ascending=[True,False]).show()

+---------------+---------------+--------------------+-------------------+-----------+
|incident_number|exposure_number|             address|      incident_date|call_number|
+---------------+---------------+--------------------+-------------------+-----------+
|        3000001|              0|       1301 Turk St.|2003-07-08 00:00:00|   31890183|
|        3000003|              0|Broadway St. / Ta...|2003-01-01 00:00:00|   30010002|
|        3000006|              0|Market St. / Spea...|2003-01-01 00:00:00|   30010005|
|        3000007|              0|3rd St. / Harriso...|2003-01-01 00:00:00|   30010007|
|        3000014|              0|33rd Av. / Norieg...|2003-01-01 00:00:00|   30010016|
|        3000016|              0|        291 10th St.|2003-01-01 00:00:00|   30010019|
|        3000018|              0|11th St. / Howard...|2003-01-01 00:00:00|   30010023|
|        3000020|              0|3rd St. / Howard St.|2003-01-01 00:00:00|   30010028|
|        3000021|              0|300 The Em

###GroupBy

The groupBy allows us to obtain different results and statistics

In [40]:
sub_df.groupBy("address").count().show()

+--------------------+-----+
|             address|count|
+--------------------+-----+
| 180 Beaumont Avenue|    2|
|  1329 Gilman Avenue|    1|
|   1600 Filbert St 3|    1|
|  895 Pacific Av 432|    1|
|    231 Wilde Avenue|    1|
|         5 Lenox Way|   14|
|    859 Baker Street|    2|
|    250 Clara Street|    5|
|       Larkin Street|   71|
|    125 Cambon Dr 12|    5|
|550 Buena Vista A...|   27|
|  800 Indiana St 375|    1|
|    603 Mason Street|   21|
|       375 10th Av 2|    1|
|   300 Toland Street|    3|
|    1541 12th Avenue|   33|
|    678 Green Street|    5|
|          Bay Bridge|    5|
|      Avenue B  East|    1|
|    605 Jones Street|    8|
+--------------------+-----+
only showing top 20 rows



In [41]:
sub_df.groupBy("address").agg({"incident_number":"mean"}).show()

+--------------------+--------------------+
|             address|avg(incident_number)|
+--------------------+--------------------+
| 180 Beaumont Avenue|        1.80801035E7|
|  1329 Gilman Avenue|         1.9144015E7|
|   1600 Filbert St 3|          1.914133E7|
|  895 Pacific Av 432|         1.9140387E7|
|    231 Wilde Avenue|         1.9139957E7|
|         5 Lenox Way| 1.664830657142857E7|
|    859 Baker Street|          1.860605E7|
|    250 Clara Street|        1.75013382E7|
|       Larkin Street| 1.862224366197183E7|
|    125 Cambon Dr 12|         1.8881239E7|
|550 Buena Vista A...|1.8285991814814813E7|
|  800 Indiana St 375|         1.9133007E7|
|    603 Mason Street| 1.739179685714286E7|
|       375 10th Av 2|         1.9125047E7|
|   300 Toland Street|1.7767253333333332E7|
|    1541 12th Avenue|1.6780718272727273E7|
|    678 Green Street|        1.72614326E7|
|          Bay Bridge|        1.70701634E7|
|      Avenue B  East|         1.9110928E7|
|    605 Jones Street|      1.75

####Group doing different operations

In [42]:
sub_df.groupBy("address").agg(F.min("incident_number"),
                              F.mean("incident_number").alias("mean_indicent"),
                              F.max("incident_number")).show()

+--------------------+--------------------+--------------------+--------------------+
|             address|min(incident_number)|       mean_indicent|max(incident_number)|
+--------------------+--------------------+--------------------+--------------------+
| 180 Beaumont Avenue|            17015833|        1.80801035E7|            19144374|
|  1329 Gilman Avenue|            19144015|         1.9144015E7|            19144015|
|   1600 Filbert St 3|            19141330|          1.914133E7|            19141330|
|  895 Pacific Av 432|            19140387|         1.9140387E7|            19140387|
|    231 Wilde Avenue|            19139957|         1.9139957E7|            19139957|
|         5 Lenox Way|            14050587| 1.664830657142857E7|            19139933|
|    859 Baker Street|            18073271|          1.860605E7|            19138829|
|    250 Clara Street|            15115183|        1.75013382E7|            19138297|
|       Larkin Street|            18014418| 1.86222436

####Group and filter

In [43]:
sub_df.groupBy("address",
               "incident_date").agg(F.count("call_number").alias("count")).filter(F.col("count")>1).show()

+--------------------+-------------------+-----+
|             address|      incident_date|count|
+--------------------+-------------------+-----+
|         6 Cargo Way|2019-08-12 00:00:00|    2|
|364 Divisadero St...|2019-08-10 00:00:00|    2|
|     55 Union Street|2019-06-16 00:00:00|    2|
|    125 Cambon Drive|2019-05-07 00:00:00|    2|
|    480 Ellis St 245|2019-02-03 00:00:00|    2|
|       28 2nd Street|2019-02-02 00:00:00|    2|
|  44 Woodland Avenue|2019-01-06 00:00:00|    2|
|          Mission St|2018-02-09 00:00:00|    2|
|        0 3rd Street|2018-01-21 00:00:00|    2|
| 445 Ofarrell Street|2017-12-19 00:00:00|    2|
|    540 Jones Street|2017-11-14 00:00:00|    2|
|   100 Larkin Street|2016-12-21 00:00:00|    2|
|          2nd Street|2019-02-09 00:00:00|    2|
|0 Cesar Chavez St...|2017-10-27 00:00:00|    2|
|        0 3rd Street|2017-06-01 00:00:00|    3|
|    2300 16th Street|2017-05-22 00:00:00|    2|
| 240 Stockton Street|2017-05-03 00:00:00|    2|
| 1446 Jackson Stree

In [44]:
sub_df.groupBy("address",
               "incident_date").count().filter(F.col("count")>1).show()

+--------------------+-------------------+-----+
|             address|      incident_date|count|
+--------------------+-------------------+-----+
|         6 Cargo Way|2019-08-12 00:00:00|    2|
|364 Divisadero St...|2019-08-10 00:00:00|    2|
|     55 Union Street|2019-06-16 00:00:00|    2|
|    125 Cambon Drive|2019-05-07 00:00:00|    2|
|    480 Ellis St 245|2019-02-03 00:00:00|    2|
|       28 2nd Street|2019-02-02 00:00:00|    2|
|  44 Woodland Avenue|2019-01-06 00:00:00|    2|
|          Mission St|2018-02-09 00:00:00|    2|
|        0 3rd Street|2018-01-21 00:00:00|    2|
| 445 Ofarrell Street|2017-12-19 00:00:00|    2|
|    540 Jones Street|2017-11-14 00:00:00|    2|
|   100 Larkin Street|2016-12-21 00:00:00|    2|
|          2nd Street|2019-02-09 00:00:00|    2|
|0 Cesar Chavez St...|2017-10-27 00:00:00|    2|
|        0 3rd Street|2017-06-01 00:00:00|    3|
|    2300 16th Street|2017-05-22 00:00:00|    2|
| 240 Stockton Street|2017-05-03 00:00:00|    2|
| 1446 Jackson Stree