# Google Ads Scaper

This scraper shall scrape Google Ads (Google Ad Words) for given keywords.

![logo](./pics/GoogleAds_002.png "Logo Title Text 2")

### DOM Hierachy:
![path](./pics/DOM_path_to_tads.png "Logo Title Text 2")

### How are they structured

```html
<div id="tads">
    <ol>
        <li class="ads-ad">_</li>
        <li class="ads-ad">_</li>
        <li class="ads-ad">_</li>
        ...
    </ol>
</div>
```

- the Ads are in a div with the id "tads"
- every ad itself is a list item with the class "ads-ad"

In [1]:
from bs4 import BeautifulSoup
import urllib3

In [9]:
class AdWordScraper():
    title = ""
    url = ""
    ad_list = []
    
    def __init__(self, keyword):
        self.title = keyword
        self.url = self.generate_url()
        self.ad_list = self.scrape_ads()
        
    def generate_url(self):
        keyword = self.title.lower()
        split = keyword.split(" ")
        merge = "+".join(split)
        link = "http://www.google.de/search?q=" + merge
        return link
    
    def scrape_ads(self):
        scraped = self.get_site_soup()    
        ads = scraped.find_all("li", {"class": "ads-ad"})
        ad_list = []
        for ad in ads:
            dic = {
                "text": ad.find('a').text,
                "ad_link": ad.find('div', {'class': 'ads-visurl'}).cite.text,
                "html": ad
            }
            ad_list.append(dic)

        return ad_list
    
    def get_site_soup(self):
        http = urllib3.PoolManager()
        r = http.request('get', self.url)

        # format it nicely and return
        soup = BeautifulSoup(r.data, 'lxml')
        return soup
        

In [32]:
def get_ads(keywords):
    ads = [AdWordScraper(key) for key in keywords]
    return ads

In [33]:
keywords = ["refugee", "eu", "europe", "smartphone", "google", "beutel", "bedrucken", "ads", "vote", "brexit"]

In [35]:
ads = get_ads(keywords)
ads

[<__main__.AdWordScraper at 0x7f55765b7860>,
 <__main__.AdWordScraper at 0x7f5576e04e48>,
 <__main__.AdWordScraper at 0x7f55766bd400>,
 <__main__.AdWordScraper at 0x7f558c33ae48>,
 <__main__.AdWordScraper at 0x7f55766997b8>,
 <__main__.AdWordScraper at 0x7f5576512cf8>,
 <__main__.AdWordScraper at 0x7f557689cfd0>,
 <__main__.AdWordScraper at 0x7f5576516a20>,
 <__main__.AdWordScraper at 0x7f5576c576d8>,
 <__main__.AdWordScraper at 0x7f55764292e8>]

In [31]:
# check if there are ads for the keywords
for ad in ads:
    print("{}: {}".format(ad.title, not not ad.ad_list))

refugee: False
eu: False
europe: False
smartphone: True
google: False
beutel: False
bedrucken: True
ads: True
vote: False
brexit: False
