# 03. Scraper: TimeBank Listings
> Author: [Dawn Graham](https://dawngraham.github.io/)

Get listings for all offers and requests.

## Import Libraries

In [1]:
import pandas as pd
import requests
import time
import regex as re
from bs4 import BeautifulSoup

In [2]:
timebanks = pd.read_csv('../data/timebanks_190112_000745.csv', usecols=['url'])
timebanks.head()

Unnamed: 0,url
0,http://addington.timebanks.org
1,http://aha.timebanks.org
2,http://alticultura.timebanks.org
3,http://andersoncommunity.timebanks.org
4,http://ate.timebanks.org


## Get Offer & Request Listings

**Order to capture listings:**  
- For both listings types (1 = offers, 2 = requests)
    - get all timebanks
        - get all pages for each timebank
            - get all listings on each page


In [3]:
listings = []

total_timebanks = timebanks.shape[0]
print(f'Getting listings from {total_timebanks} timebanks... ')

# Get type 1 (offers) and type 2 (requests)
for i in range(1, 3):
    
    print(f'Getting type {i}... ', end=' ')
    counter = 0
    
    # Iterate through all timebank directories
    for timebank in range(len(timebanks)):
        url = f"{timebanks['url'][timebank]}/ads?so=desc&o=updated&limit=100&type={i}"
        res = requests.get(url)
        soup = BeautifulSoup(res.content, 'lxml')
        
        # Get total number of listings
        get_total = soup.find('div', {'class': 'pager-state'}).text
        total_listings = re.sub(r'(Showing \d+ - \d+ of )', '', get_total)
        try:
            total_listings = (int(total_listings))
        except:
            total_listings = 0
        
        # Get number of pages needed to get all listings
        pages = total_listings//100 + 1
        
        # Iterate through pages
        for page in range(pages):
            
            url = f"{timebanks['url'][timebank]}/ads?so=desc&o=updated&limit=100&type={i}&offset={page * 100}"
            res = requests.get(url)
            soup = BeautifulSoup(res.content, 'lxml')

            timebank_name = timebanks['url'][timebank].replace('.timebanks.org', '').strip('http://')

            for row in soup.findAll('div', {'class': 'media'}):

                listing = {}

                # Get listing id
                listing['listing_id'] = int(row.find('input', {'class': ' selection-id'}).get('value'))

                # Add all parent and child categories for a listing to `cat` set
                cat = set()

                for parent in row.findAll('a', {'class': 'parent'}):
                    cat.add(int(parent.get('href').strip('/ads?cat=')))

                for child in row.findAll('a', {'class': 'child'}):
                    cat.add(int(child.get('href').strip('/ads?cat=')))

                listing['listing_cat'] = cat

                # Get listing description
                listing['description'] = row.h4.text.strip()

                # Add timebank name
                listing['timebank'] = timebank_name

                # Add listing type
                if i == 1:
                    listing['type'] = 'offer'
                else:
                    listing['type'] = 'request'

                listings.append(listing)
                
        print(counter + 1, end=' ')

        time.sleep(1)
        counter += 1
        
# Save to dateframe
listings = pd.DataFrame(listings)

# Export to csv
filetime = time.strftime("%y%m%d_%H%M%S", time.localtime())
listings.to_csv(f'../data/listings_{filetime}.csv', index=False)

Getting listings from 158 timebanks... 
Getting type 1...  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 Getting type 2...  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 12

In [4]:
listings

Unnamed: 0,description,listing_cat,listing_id,timebank,type
0,I offer car wash.,"{33, 34, 35, 4}",2421,addington,offer
1,Listening Ear/Chat,"{32, 3, 5, 41, 31}",2420,addington,offer
2,Tutoring,"{59, 7}",2418,addington,offer
3,On Line Dating,"{56, 5, 46, 7}",2415,addington,offer
4,Gib board,"{9, 69, 4, 37}",2409,addington,offer
5,Window cleaning,"{2, 22}",2411,addington,offer
6,Pet feeding,"{9, 2, 74, 23}",2392,addington,offer
7,Proof reading and Editing,"{10, 75}",2388,addington,offer
8,"Advice or any IT development - Website, Iot, d...","{70, 7, 9, 10, 76, 56}",2380,addington,offer
9,Fix electronics - don't throw away your electr...,"{70, 7, 9, 10, 76, 56}",2379,addington,offer
