# Load python modules

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
from pprint import pprint

pd.set_option('max_colwidth', 120)

# A list of post offices in Boulder & Weld County Colorado

## Load Boulder county post offices

In [3]:
with open('boulder.txt') as f:
    boulder = f.read()
boulder = boulder.split('\n')

In [15]:
pprint(boulder, width=100, compact=True)

['allenspark', 'altona', 'balarat', 'belle monte', 'big elk', 'boulder', 'broomfield', 'bunce',
 'burlington', 'canfield', 'cardinal', 'caribou', 'coal creek', 'coal park', 'copper rock',
 'coraville', 'crags', 'crescent', 'crescent rur. sta.', 'crisman', 'davidson', 'delphi', 'downer',
 'eagle rock', 'eldora', 'eldora rur. sta.', 'eldorado springs', 'eversman', 'ferberite', 'frances',
 'glacier lake', 'gold hill', 'goldhill', 'gorham', 'gresham', 'gulch', 'hawthorne', 'hessie',
 'high mar sta.', 'hygiene', 'jamestown', 'lafayette', 'lakewood', 'langford', 'left hand',
 'longmont', 'louisville', 'lyons', 'magnolia', 'marshall', 'middle boulder', 'modoc', 'nederland',
 'ni wot', 'niwot', 'noland', 'orodelfan', 'osborn', 'peaceful valley', 'pella', 'penn',
 'pinecliffe', 'primos', 'puzzler', 'rockville', 'rowena', 'salina', 'shelton', 'springdale',
 'sugar loaf', 'sunset', 'sunshine', 'superior', 'tungsten', 'valmont', 'valmont sta.', 'vesuvius',
 'wallstreet', 'ward', 'ward district', '

In [5]:
boulder = [town.lower() for town in boulder]   # use lower case for string comparisons

## Load Weld county post offices

In [6]:
with open('weld.txt') as f:
    weld = f.read()
weld = weld.split('\n')

In [14]:
pprint(weld, width=100, compact=True)

['alfalfa', 'american ranch', 'ault', 'avalo', 'barnesville', 'black wolf', 'briggsdale',
 'buckingham', 'buffalo', 'camfield', 'carr', 'chapelton', 'cherokee city', 'clearwater', 'coleman',
 'college sta.', 'cornish', 'cotsworth', 'crest', 'dacono', 'denver junction', 'dover',
 'downtown sta.', 'eaton', 'eatonton', 'erie', 'evans', 'firestone', 'flemings ranch',
 'fort lupton', 'fort moore', 'fort morgan', 'fort sedgwick', 'fosston', 'frederick',
 'fremonts orchard', 'galeton', 'gault', 'geary', 'gilcrest', 'gill', 'gowanda', 'graham',
 'greeley', 'grover', 'hardin', 'hereford', 'highland lake', 'highlandlake', 'hillsboro',
 'hillsborough', 'hiltonville', 'hudson', 'ione', 'johnstown', 'julesburgh', 'junction house',
 'kalous', 'kauffman', 'keensburg', 'keota', 'kersey', 'koenig', 'kuner', 'la salle', 'latham',
 'levinson', 'lillian springs', 'lucerne', 'mamre', 'masters', 'mead', 'milliken', 'morgan',
 'nantes', 'new liberty', 'new raymer', 'new wattenburg', 'new windsor', 'nunn', 'o

In [8]:
weld = [town.lower() for town in weld]

# Load webpage from www.postal-history.com

In [9]:
def get_state(statecode, sec=1):
    """ Method returns a dataframe of a state's postal history from Jim Mehrer's site.
        Colorado is always on the section1 webpage, set the sec variable to the correct
        section for the state you collect. """

    url = f'http://www.postal-history.com/mailbidsale.section{sec}.html'
    statecode = statecode.upper()

    # Download webpage and parse with BeautifulSoup
    r = requests.get(url)
    soup = BeautifulSoup(r.text)

    found = []
    for text in soup.stripped_strings:
        text = text.replace('\r\n', ' ')
        m = re.search(rf'(^{statecode}-\d+).\s+(\S+),\s+(\d+),\s+(.*)', text)
        if m:
          found.append(m.group(1, 2, 3, 4))

    df = pd.DataFrame(found, columns=['cat', 'town', 'year', 'desc'])
    return df

In [10]:
co = get_state('CO')
co = co.set_index('cat')
co

Unnamed: 0_level_0,town,year,desc
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CO-1,Abbey,1907,G+ CDS as recd (91-14) on pix side of PPC. E $15 MIN.8
CO-2,Abbott,1907,"F 4-bar (87-26) on PPC. 2 strikes overlapped, dated Mar 23 & Apr 6; earlier strike maybe misdated, as Hillrose VG+ d..."
CO-3,Abbott,1910,F 4-bar (cr) (87-26) on PPC. E $20
CO-4,Adena,1922,"F 4-bar (toned) (10-49) on PPC (Curry Hotel, Ft.Morgan). E $15 MIN.8"
CO-5,Argo,1906,F+ Doane 2/1 (slight stutter; tip crs) (81-11) on PPC. E $15 MIN.8
...,...,...,...
CO-76,Waldorf,1908,"G+ 4-bar (toned) (06-12) on PPC w/Clarkson, GA, VG Doane 3/3 as recd. E $12 MIN.6"
CO-77,Waldorf,1910,"G+ 4-bar (upper R on stamp; lite tone; lower L tip nib) (06-12) on PPC: ""...mailing this at the highest P.O. in the ..."
CO-78,Welcome,1911,"G+ 4-bar (""W"" partial; ""C"" in state not struck; crs) (10-12) on PPC. E $150"
CO-79,Woodmen,1912,G+ 4-bar (EARLY) (12-49) on PPC. E $15


## Compare dataframe town column to the Boulder towns list

The towns below are in Boulder county and for sale in the www.postal-history.com latest auction.
<strong>Be aware that a town may have the same name in a different county.</strong>

In [11]:
co.loc[co.town.str.lower().isin(boulder)]

Unnamed: 0_level_0,town,year,desc
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CO-33,Gulch,1913,"VG 4-bar (lite tone) (95-16) on 2.75x5.5"" PPC. E $60 MIN.30"
CO-38,Hawthorne,1908,G+ magenta 4-bar (near VF) (06-30) on PPC. E $15 MIN.8
CO-57,Primos,1907,VF 4-bar (toned; upper L tip clip; tip crs) (07-13) on PPC. E $75


## and now Weld county towns

In [12]:
co.loc[co.town.str.lower().isin(weld)]

Unnamed: 0_level_0,town,year,desc
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CO-36,Hardin,1910,G+ 4-bar (lite tone) (81/55) on PPC. E $12 MIN.6
