# Download property tax records

In [2]:
import os
import csv
import time
import requests
import settings

In [3]:
class SBAssessorScrape(object):
    """
    A base class with our common scraper tasks.
    """
    headers={
        "User-Agent": "Los Angeles Times Data Desk (datadesk@latimes.com)"
    }

    def __init__(self, apn, year=2018):
        self.apn = apn
        self.year = year
    
    def scrape(self):
        if not self.already_scraped:
            html = self.get_html(self.url)
            self.write(html)
            time.sleep(10)
        return self.outpath
    
    @property
    def outpath(self):
        return f"{settings.input_dir}/scrape/{self.slug}-{self.apn}-{self.year}.html"

    @property
    def already_scraped(self):
        return os.path.exists(self.outpath)
    
    def get_html(self, url):
        print(f"Downloading {self.url} to {self.outpath}")
        return requests.get(self.url, headers=self.headers).text
    
    def write(self, html):
        with open(self.outpath, 'wb') as f:
            f.write(html.encode("utf-8"))

In [4]:
class DetailsScrape(SBAssessorScrape):
    """
    Scrapes an APN's detail page.
    """
    slug = "details"
    
    @property
    def url(self):
        return f"http://sbcassessor.com/assessor/details.aspx?apn={self.apn}"   

In [5]:
class ValueNoticeScrape(SBAssessorScrape):
    """
    Scrapes an APN's value notice page.
    """
    slug = "valuenotice"

    @property
    def url(self):
        return f"http://sbcassessor.com/assessor/ValueNotices.aspx?APN={self.apn}"

In [6]:
class BillScrape(SBAssessorScrape):
    """
    Scrapes an APN's bill page.
    """
    slug = "bill"

    @property
    def url(self):
        long_apn = f"{self.apn[:3]}-{self.apn[3:6]}-{self.apn[6:]}"
        return f"http://taxes.co.santa-barbara.ca.us/propertytax/taxbill.asp?FiscalYear={self.year}&ParcelNumber={long_apn}"

In [9]:
apn_list = [row['apn'] for row in csv.DictReader(open(f"{settings.output_dir}/hollister-parcels.csv"))]

In [10]:
detail_list = dict((apn, DetailsScrape(apn).scrape()) for apn in apn_list)

In [11]:
valuenotice_list = dict((apn, ValueNoticeScrape(apn).scrape()) for apn in apn_list)

In [12]:
bill_list = dict((apn, BillScrape(apn).scrape()) for apn in apn_list)