#Garage Saler 

A small project to practice web scraping, data aggregation and management, and hopefully some mapping and eventually machine learning (image recognition) tools. The goal of the project is to answer the question: "Are there any estate/garage sales near me, and if so, is there anything there that I want?" 

Resources: 

https://googlemaps.github.io/google-maps-services-python/docs/2.2/

https://developers.google.com/maps/documentation/static-maps/intro

http://www.estatesales.net/GA/Decatur/30033

http://chrisholdgraf.com/querying-craigslist-with-python/

http://pythontips.com/2013/08/08/storing-and-loading-data-with-json/


imports and initial data collection:

In [1]:
%matplotlib inline
from bs4 import BeautifulSoup
from collections import OrderedDict
import googlemaps as goo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import requests
from sklearn.cluster import KMeans
import time
import webbrowser

r = requests.get('http://www.estatesales.net/GA/Decatur/30033')
soup = BeautifulSoup(r.text,'html5lib')
sales = soup.find_all('section', attrs={'class':'saleItem'})

class and function definitions: 

In [24]:
class Sale(object):
    """Base class for sale objects. Each inheriting class needs to define fn sale_info"""
    homeaddress = None
    homelat = None
    homelng = None
    Sale.Gmaps_apikey = None

    def __init__(self, text):
        # Init vars
        self.rawtext = text
        self.streetaddress = None
        self.zipcode = None
        self.url = None
        self.happeningnow = None
        self.pictures = None
        self.distancetohome = None
        self.lat = None
        self.lng = None
        
        # Populate vars
        self.sale_info(text)
    
    def formatted_address(self, addy):
        return addy.strip().strip(',').strip('.').replace(' ', '+')

    def getlatlong(self, address):
        g = goo.Client(key=Sale.Gmaps_apikey)
        print address
        ll = g.geocode(address, components={'country':'US'}) 
        return (ll[0]['geometry']['location']['lat'], ll[0]['geometry']['location']['lng'])
        
    
class EstateSale(Sale):
    def sale_info(self, es):
        now = es.select('span.timeMessage')
        if not now:
            return None

        ur = es.select('h3 > a')
        street = es.find('span',attrs={'id':re.compile('.*StreetAddressSpan.*')})
        zc = es.find('span',attrs={'id':re.compile('.*PostalCodeSpan.*')})
        if not ur or not street or not zc:
            print "Whoops"
            return None
        
        self.zipcode = zc.text
        if street.text != '' and self.zipcode is not None:
            self.streetaddress = self.formatted_address(street.text)
            (self.lat, self.lng) = self.getlatlong(self.streetaddress+','+self.zipcode)
        self.url   = 'http://www.estatesales.net' + ur[0]['href']
        self.happeningnow = now[0].text


class GarageSale(Sale):
    def sale_info(self, gs):
        pass #tbd
    
def plotsalemap(saleslist, apikey, home):
    baseurl = 'https://maps.googleapis.com/maps/api/staticmap?'

    markers = ['color:blue|label:S|' + x for x in saleslist if x != '']
        
    myparams = OrderedDict({'key': [apikey],
                            'markers': ['color:red|label:H|' + home] + markers,
                            'maptype': ['roadmap'],
                            'size': ['600x300'],
                            'center': [home] })
    
    #can also use requests.get(baseurl, params=myparams) for the following url assembly 
    newurl = baseurl
    while myparams:
        param = myparams.popitem()
        for i in param[1]:
            newurl = newurl + '&' + param[0] + '=' + i
    webbrowser.open(newurl)
    return newurl

def plotroute(saleslist, apikey, home):
    baseurl = 'https://maps.googleapis.com/maps/api/staticmap?'

    markers = ['color:blue|label:S|' + str(x[0]) + ',' + str(x[1]) for x in saleslist if x != '']
    print markers, home
    myparams = OrderedDict({'key': [apikey],
                            'markers': ['color:red|label:H|' + home] + markers,
                            'maptype': ['roadmap'],
                            'size': ['600x300'],
                            'center': [home] })
    
    #can also use requests.get(baseurl, params=myparams) for the following url assembly 
    newurl = baseurl
    while myparams:
        param = myparams.popitem()
        for i in param[1]:
            newurl = newurl + '&' + param[0] + '=' + i
    print newurl
    webbrowser.open(newurl)
    
def getwalkingroute(g, home, latlng, triplength):
    # drop sales greater than (triplength) miles away from home
    distmat = g.distance_matrix(home, latlng, mode='walking')
    dist2home = np.array([x['distance']['value'] for x in distmat['rows'][0]['elements']])
    print dist2home
    latlng = np.array(latlng)
    wp = np.c_[ latlng[dist2home<(triplength)] , 
                   dist2home[dist2home<(triplength)] ]
    print wp[:,0:2].tolist()
    
    # If more than 8 waypoints remaining, choose the 8 closest to home
    if len(wp)>8:
        wp = wp[wp[:,2].argsort()]
        wp = wp[0:8,:]
        print wp[:,0:2].tolist()
       
    # get directions. if total distance greater than triplength, drop 
    # farthest stop and try again
    bestroute = False
    while not bestroute:
        time.sleep(2)
        route = g.directions(home, home, mode='walking', waypoints=wp[:,0:2].tolist(), optimize_waypoints='True')
        print [i['distance']['value'] for i in route[0]['legs']]
        temptriplen = sum([i['distance']['value'] for i in route[0]['legs']])
        if temptriplen > 2*triplength:
            wp = wp[wp[:,2]<max(wp[:,2])]
            print wp
        else:
            bestroute = True
                
    return wp

#main()
To do:

- figure out how to return directions to user (plotting, turn-by-turn, interactive map, etc)
- start writing unit tests
- start looking into image search feasibility 

In [None]:
# Get user-specific values from file
with open('user_values.txt') as myfile:
    Gstaticmap_apikey = myfile.readline().rstrip()
    Sale.Gmaps_apikey = myfile.readline().rstrip()
    Sale.homeaddress = myfile.readline().rstrip()
 
# Get current sales and collect location information 
sale_info_list = [EstateSale(e) for e in sales]

# Update home info
if Sale.homelat is None:
    (Sale.homelat, Sale.homelng) = sale_info_list[0].getlatlong(Sale.homeaddress)

# Compile info in usable format for mapping
latlongs_str = [(str(es.lat)+','+str(es.lng)) for es in sale_info_list if es.lat is not None]
latlongs_tup = [(es.lat,es.lng) for es in sale_info_list if es.lat is not None]

waypoints = getwalkingroute(goo.Client(Sale.Gmaps_apikey), Sale.homeaddress, latlongs_tup, 17000)

plotsalemap(latlongs_str[:30], Gstaticmap_apikey, Sale.homeaddress)
plotroute(waypoints.tolist(), Gstaticmap_apikey, Sale.homeaddress)