# 01. Scraper: Directory
> Author: [Dawn Graham](https://dawngraham.github.io/)

Scrape all pages of TimeBanks.org directory to get initial listing of all timebanks.

## Import libraries

In [1]:
import pandas as pd
import requests
import time
import unicodedata
from bs4 import BeautifulSoup

## Get slugs

In [2]:
tb_slugs = []

counter = 0
print('Getting timebank slugs... Directory page # ')

# Cycle through all pages in directory
for page in range(16):
    url = f'http://community.timebanks.org/directory?page={page}&js=1&order=title_1&sort=asc'
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    table = soup.find('table')
    
    for row in table.find('tbody').find_all('tr'):
        tb_slug = {}
        tb_slug['name'] = row.find('a').text.strip()
        tb_slug['slug'] = row.find('a').attrs['href'].strip('/')
        tb_slugs.append(tb_slug)
        
    print(counter+1, end=' ')
    
    time.sleep(1)
    counter += 1

Getting timebank slugs... Directory page # 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 

In [3]:
pd.DataFrame(tb_slugs).head()

Unnamed: 0,name,slug
0,Addington TimeBank,timebanks/addington-timebank
1,AHA Time Bank (Alaskans Helping Alaskans),timebanks/aha-time-bank-alaskans-helping-alaskans
2,Alticultura,timebanks/alticultura
3,Anderson Community Timebank,timebanks/anderson-community-timebank
4,Appalachian Time Exchange,timebanks/appalachian-time-exchange


## Get timebank details

In [4]:
timebanks = []
counter = 0
total_timebanks = pd.DataFrame(tb_slugs).shape[0]

print(f'Getting {total_timebanks} timebank details... ')
for tb in range(len(tb_slugs)):
    url = f"http://community.timebanks.org/{tb_slugs[tb]['slug']}"
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')

    timebank = {}
    timebank['name'] = tb_slugs[tb]['name']
    timebank['slug'] = tb_slugs[tb]['slug']
    timebank['members'] = soup.find('div', {'class': 'views-field-field-num-users-value'}).span.text.strip().replace(',', '')
    timebank['last_exchange'] = soup.find('div', {'class': 'views-field-field-last-exchange-value'}).span.text.strip()
    timebank['offers'] = soup.find('div', {'class': 'views-field-field-active-offers-value'}).span.text.strip().replace(',', '')
    timebank['requests'] = soup.find('div', {'class': 'views-field-field-active-requests-value'}).span.text.strip().replace(',', '')
    timebank['exchanges'] = soup.find('div', {'class': 'views-field-field-num-exchanges-value'}).span.text.strip().replace(',', '')
    timebank['hours'] = soup.find('div', {'class': 'views-field-field-hours-exchanged-value'}).span.text.strip().replace(',', '')
    timebank['focus'] = soup.find('div', {'class': 'views-field-name'}).span.text.strip()
    timebank['sponsor'] = soup.find('div', {'class': 'views-field-field-tb-sponsor-value'})
    timebank['url'] = soup.find('div', {'class': 'views-field-markup-1'}).a['href'].strip()
    timebank['url_2'] = soup.find('div', {'class': 'views-label-field-custom-url-url'})
    timebank['address'] = soup.find('div', {'class': 'views-field-street'}).span.get_text(', ').strip()
    timebank['postal'] = soup.find('div', {'class': 'views-field-postal-code'}).span.text.strip()
    timebank['country'] = soup.find('div', {'class': 'views-field-country'}).span.text.strip()
    timebank['phone'] = soup.find('div', {'class': 'views-field-phone'})
    timebank['notes'] = soup.find('div', {'class': 'views-field-body'}).div.text.replace('\xa0',' ').replace('\n', ' ')
    
    timebanks.append(timebank)
        
    if (counter+1) % 5 == 0:
        print(counter, end=' ')
    
    time.sleep(1)
    counter += 1
    if counter == total_timebanks:
        print('Done.')

# Save to dataframe
timebanks = pd.DataFrame(timebanks)

Getting 158 timebank details... 
4 9 14 19 24 29 34 39 44 49 54 59 64 69 74 79 84 89 94 99 104 109 114 119 124 129 134 139 144 149 154 Done.


## Get Mission
i.e. https://addington.timebanks.org/mission

In [5]:
counter = 0
total_timebanks = timebanks.shape[0]
timebanks['mission'] = ''

print(f'Getting {total_timebanks} timebank details... ')
for timebank in range(len(timebanks)):
    try:
        url = f"{timebanks['url'][timebank]}/mission"
        res = requests.get(url)
        soup = BeautifulSoup(res.content, 'lxml')

        timebanks['mission'][timebank] = soup.find('div', {'class': 'page-content'}).text.strip().replace('\xa0',' ').replace('\n', ' ').replace('\r', '')
    except:
        pass

    if (counter+1) % 5 == 0:
        print(counter, end=' ')
    
    time.sleep(1)
    counter += 1
    if counter == total_timebanks:
        print('Done.')
        
# Export to csv
filetime = time.strftime("%y%m%d_%H%M%S", time.localtime())
timebanks.to_csv(f'../data/timebanks_{filetime}.csv', index=False)

Getting 158 timebank details... 
4 9 14 19 24 29 34 39 44 49 54 59 64 69 74 79 84 89 94 99 104 109 114 119 124 129 134 139 144 149 154 Done.


In [6]:
timebanks.head()

Unnamed: 0,address,country,exchanges,focus,hours,last_exchange,members,name,notes,offers,phone,postal,requests,slug,sponsor,url,url_2,mission
0,"7 Dickens St, Addington",New Zealand,1132,,5997,50 min 35 sec ago,178,Addington TimeBank,This TimeBank became live on 1 February 2012. ...,37,"<div class=""views-field-phone""> <label class=""...","Christchurch, Canterbury 8024",28,timebanks/addington-timebank,"<div class=""views-field-field-tb-sponsor-value...",http://addington.timebanks.org,,Addington TimeBank: Serving Southern Christchu...
1,"331 Sterling Highway, PO BOX 3493 (mailing add...",United States,439,,1375,2 years 41 weeks ago,207,AHA Time Bank (Alaskans Helping Alaskans),,0,"<div class=""views-field-phone""> <label class=""...","Homer, Alaska 99603",0,timebanks/aha-time-bank-alaskans-helping-alaskans,,http://aha.timebanks.org,,Our mission is to strengthen our community b...
2,"Entrada al Refugio Tierra Colorada Baja , PO B...",Guatemala,none (start up),,none (start up),none (start up),2,Alticultura,,0,"<div class=""views-field-phone""> <label class=""...","Quetzaltenango, Quetzaltenango 9001",0,timebanks/alticultura,"<div class=""views-field-field-tb-sponsor-value...",http://alticultura.timebanks.org,,Build adaptive capacity for integrated sustain...
3,2889 E. Center St,United States,2,,3,3 years 12 weeks ago,8,Anderson Community Timebank,,0,"<div class=""views-field-phone""> <label class=""...","Anderson, California 96007",0,timebanks/anderson-community-timebank,"<div class=""views-field-field-tb-sponsor-value...",http://andersoncommunity.timebanks.org,,Neighbors helping neighbors by utilizing their...
4,298 School Circle,United States,1,,5,1 year 30 weeks ago,8,Appalachian Time Exchange,,4,"<div class=""views-field-phone""> <label class=""...","Blairsville, Georgia 30512",4,timebanks/appalachian-time-exchange,"<div class=""views-field-field-tb-sponsor-value...",http://ate.timebanks.org,,To empower our community in sharing its gifts ...


## Get updated details

After initial collection, the code below can be run to get updated numbers for exchanges, hours, last_exchange, members, offers, and requests. This information can be used to get a sense of activity over time.

In [13]:
tb_slugs = pd.read_csv('../data/timebanks_190112_000745.csv', usecols=['slug'])

timebanks = []
counter = 0
total_timebanks = tb_slugs.shape[0]

print(f'Getting {total_timebanks} timebank details... ')
for tb in range(len(tb_slugs)):
    url = f"http://community.timebanks.org/{tb_slugs['slug'][tb]}"
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')

    timebank = {}
    timebank['members'] = soup.find('div', {'class': 'views-field-field-num-users-value'}).span.text.strip().replace(',', '')
    timebank['last_exchange'] = soup.find('div', {'class': 'views-field-field-last-exchange-value'}).span.text.strip()
    timebank['offers'] = soup.find('div', {'class': 'views-field-field-active-offers-value'}).span.text.strip().replace(',', '')
    timebank['requests'] = soup.find('div', {'class': 'views-field-field-active-requests-value'}).span.text.strip().replace(',', '')
    timebank['exchanges'] = soup.find('div', {'class': 'views-field-field-num-exchanges-value'}).span.text.strip().replace(',', '')
    timebank['hours'] = soup.find('div', {'class': 'views-field-field-hours-exchanged-value'}).span.text.strip().replace(',', '')
    timebank['url'] = soup.find('div', {'class': 'views-field-markup-1'}).a['href'].strip()
    
    timebanks.append(timebank)
        
    if (counter+1) % 5 == 0:
        print(counter, end=' ')
    
    time.sleep(1)
    counter += 1
    if counter == total_timebanks:
        print('Done.')

timebanks = pd.DataFrame(timebanks)
        
# Export to csv
filetime = time.strftime("%y%m%d_%H%M%S", time.localtime())
timebanks.to_csv(f'../data/timebank_updates_{filetime}.csv', index=False)


Getting 158 timebank details... 
4 9 14 19 24 29 34 39 44 49 54 59 64 69 74 79 84 89 94 99 104 109 114 119 124 129 134 139 144 149 154 Done.
