# Load python modules

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
from pprint import pprint

pd.set_option('max_colwidth', 120)

# A list of post offices in Boulder & Weld County Colorado

## Load Boulder county post offices

In [2]:
with open('boulder.txt') as f:
    boulder = f.read()
boulder = boulder.split('\n')

In [3]:
pprint(boulder, width=100, compact=True)

['Allenspark', 'Altona', 'Balarat', 'Belle Monte', 'Big Elk', 'Boulder', 'Broomfield', 'Bunce',
 'Burlington', 'Canfield', 'Cardinal', 'Caribou', 'Coal Creek', 'Coal Park', 'Copper Rock',
 'Coraville', 'Crags', 'Crescent', 'Crescent Rur. Sta.', 'Crisman', 'Davidson', 'Delphi', 'Downer',
 'Eagle Rock', 'Eldora', 'Eldora Rur. Sta.', 'Eldorado Springs', 'Eversman', 'Ferberite', 'Frances',
 'Glacier Lake', 'Gold Hill', 'Goldhill', 'Gorham', 'Gresham', 'Gulch', 'Hawthorne', 'Hessie',
 'High Mar Sta.', 'Hygiene', 'Jamestown', 'Lafayette', 'Lakewood', 'Langford', 'Left Hand',
 'Longmont', 'Louisville', 'Lyons', 'Magnolia', 'Marshall', 'Middle Boulder', 'Modoc', 'Nederland',
 'Ni Wot', 'Niwot', 'Noland', 'Orodelfan', 'Osborn', 'Peaceful Valley', 'Pella', 'Penn',
 'Pinecliffe', 'Primos', 'Puzzler', 'Rockville', 'Rowena', 'Salina', 'Shelton', 'Springdale',
 'Sugar Loaf', 'Sunset', 'Sunshine', 'Superior', 'Tungsten', 'Valmont', 'Valmont Sta.', 'Vesuvius',
 'Wallstreet', 'Ward', 'Ward District', '

In [4]:
boulder = [town.lower() for town in boulder]   # use lower case for string comparisons

## Load Weld county post offices

In [5]:
with open('weld.txt') as f:
    weld = f.read()
weld = weld.split('\n')

In [6]:
pprint(weld, width=100, compact=True)

['Alfalfa', 'American Ranch', 'Ault', 'Avalo', 'Barnesville', 'Black Wolf', 'Briggsdale',
 'Buckingham', 'Buffalo', 'Camfield', 'Carr', 'Chapelton', 'Cherokee City', 'Clearwater', 'Coleman',
 'College Sta.', 'Cornish', 'Cotsworth', 'Crest', 'Dacono', 'Denver Junction', 'Dover',
 'Downtown Sta.', 'Eaton', 'Eatonton', 'Erie', 'Evans', 'Firestone', 'Flemings Ranch',
 'Fort Lupton', 'Fort Moore', 'Fort Morgan', 'Fort Sedgwick', 'Fosston', 'Frederick',
 'Fremonts Orchard', 'Galeton', 'Gault', 'Geary', 'Gilcrest', 'Gill', 'Gowanda', 'Graham',
 'Greeley', 'Grover', 'Hardin', 'Hereford', 'Highland Lake', 'Highlandlake', 'Hillsboro',
 'Hillsborough', 'Hiltonville', 'Hudson', 'Ione', 'Johnstown', 'Julesburgh', 'Junction House',
 'Kalous', 'Kauffman', 'Keensburg', 'Keota', 'Kersey', 'Koenig', 'Kuner', 'La Salle', 'Latham',
 'Levinson', 'Lillian Springs', 'Lucerne', 'Mamre', 'Masters', 'Mead', 'Milliken', 'Morgan',
 'Nantes', 'New Liberty', 'New Raymer', 'New Wattenburg', 'New Windsor', 'Nunn', 'O

In [7]:
weld = [town.lower() for town in weld]

# Load webpage from www.postal-history.com

In [8]:
def get_state(statecode, sec=1):
    """ Method returns a dataframe of a state's postal history from Jim Mehrer's site.
        Colorado is always on the section1 webpage, set the sec variable to the correct
        section for the state you collect. """

    url = f'http://www.postal-history.com/mailbidsale.section{sec}.html'
    statecode = statecode.upper()

    # Download webpage and parse with BeautifulSoup
    r = requests.get(url)
    soup = BeautifulSoup(r.text)

    found = []
    for text in soup.stripped_strings:
        text = text.replace('\r\n', ' ')
        #m = re.search(rf'(^{statecode}-\d+).\s+(\S+),\s+(\d+),\s+(.*)', text)
        m = re.search(rf'(^{statecode}-\d+).\s+(.+?),\s+(.+?),\s+(.*)', text)
        if m:
          found.append(m.group(1, 2, 3, 4))

    df = pd.DataFrame(found, columns=['cat', 'town', 'year', 'desc'])
    return df

In [12]:
co = get_state('CO')

In [19]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', None)
s = co.style.set_properties(subset=['year'], **{'width': '80px', 'text-align': 'center'})
s.set_properties(subset=['cat'], **{'width': '50px', 'text-align': 'left'})
s.set_properties(subset=['town'], **{'font-weight': 'bold'})
s.hide_index()

cat,town,year,desc
CO-1,Lot 8 diff.) Colorado Doane cancels,1907-09,VG-G+ (varied condition) from a specialized collection; LOW minimum bid for this batch; on PPCs. MIN.$15
CO-2,Alice,1909,G+ 4-bar (town bit obscured) (00/39) on PPC. E $15 MIN.8
CO-3,Amy,1914,G+ 4-bar (09-37) on PPC. E $12 MIN.6
CO-4,Apex,1910,G+ 4-bar (town part lite; toned; tip cr) (94/32) on PPC. E $15 MIN.8
CO-5,Arickaree,1913,G+ 4-bar (near VG+; tip crs) (88/61) on PPC. E $8
CO-6,Association Camp,1931,VG 4-bar ty.E (16-66) on commer.PPC (Island Lake). E $12 MIN.6
CO-7,Avon,1910,F 4-bar on PPC. E $12 MIN.6
CO-8,Burdett,1889 (Jan 1),"G+ CDS/target (EARLY; trim R; edge tears R) ""The Colorado Topics, Hyde, Colo."" 20-line text ad at L (88-37) on cvr w/Hyde (82/40) VG CDS as recd. E $30"
CO-9,Calhan,1909,G+ Doane 3/5 (lite tone) on PPC. E $8
CO-10,Cascade,1889,G+ CDS/target (EARLY; town part spotty; no flap; tear R) on cvr. E $20


## Compare dataframe town column to the Boulder towns list

The towns below are in Boulder county and for sale in the www.postal-history.com latest auction.
<strong>Be aware that a town may have the same name in a different county.</strong>

In [20]:
co.loc[co.town.str.lower().isin(boulder)]

Unnamed: 0,cat,town,year,desc
15,CO-16,Eldora,1906,G+ Doane 2/6 (near F+; crs) (97-67) on PPC. E $12 MIN.6
16,CO-17,Eldora,1909,F Doane 2/6 (dial bit hi; o/s; lite tone) (97-67) on PPC. E $8
22,CO-23,Gulch,1910,"VG+ 4-bar (nick R; tip nick; crnr crs) (95-16) on PPC (litho D & RG Depot, Glenwood Spgs). E $50"
45,CO-46,Nederland,1935,VF 4-bar ty.E on commer.PPC. E $14


## and now Weld county towns

In [21]:
co.loc[co.town.str.lower().isin(weld)]

Unnamed: 0,cat,town,year,desc
12,CO-13,Denver Junction,1886,"G+ CDS/grid (part ruff trim L, in cc) pair 1c banknotes (85-86) on cvr. E $75"
27,CO-28,Hereford,1914,VG+ 4-bar on PPC. E $12 MIN.6
35,CO-36,Keota,1913,G+ magenta 4-bar (88/73) on PPC. E $12 MIN.6
70,CO-71,Sligo,1910,"G+ 4-bar (""O"" on stamp; o/w VF) (08-41) on PPC. E $14"
71,CO-72,Sligo,ca.1911,G+ 4-bar (dial bit hi; year heavy inked) (08-41) on PPC. E $14
