# Look for string in HTML
Requirements:
* [requests](https://pypi.org/project/requests/)

## List the domain names you'd like to check

In [3]:
webpages_to_check = ['http://akademssr.se', 'http://alingsas.se', 'http://arbetsgivarverket.se', 'http://cgmail.se', 'http://ekerobostader.se', 'http://fast2.se', 'http://forte.se', 'http://inera.se', 'http://ist.com', 'http://kau.se', 'http://kriminalvarden.se', 'http://lakemedelsverket.se', 'http://lfskaraborg.se', 'http://merab.se', 'http://mindofmyown.org.uk', 'http://mucf.se', 'http://ninjafilm.com', 'http://nsr.se', 'http://paxxo.se', 'http://pensionsmyndigheten.se', 'http://polisen.se', 'http://regionorebrolan.se', 'http://riksarkivet.se', 'http://salabostader.se', 'http://scb.se', 'http://sjf.se', 'http://skatteverket.se', 'http://sklkommentus.se', 'http://smhi.se', 'http://spv.se', 'http://storegate.com', 'http://stsakassa.se', 'http://sydvatten.se', 'http://titania.se', 'http://tullverket.se', 'http://uhr.se', 'http://varbergenergi.se', 'http://varldensbibliotek.se', 'http://vgregion.se']

## Check each website's HTML
Checks each website for multiple string, potentially resulting in more than one row in the CSV output.

In [4]:
import requests

print("URL;HTTP code;Analytics")

for webpage in webpages_to_check:
    i = 0
    
    try: 
        r = requests.get(webpage, timeout=20)
        
        if 'matomo' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Matomo")
        if 'piwik' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Piwik")
        if 'ga.js' in r.text.lower() or 'www.google-analytics.com/analytics.js' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Google Analytics")
        if 'gtm.js' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Google Tag Manager")
        if '//assets.adobedtm.com/' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Adobe Analytics")
        if '//www.vizzit.se/vizzittag/' in r.text.lower():
            i += 1
            print(f"{webpage};{r.status_code};Vizzit")
        #if 'googleapis.com/' in r.text.lower():
        #    i += 1
        #    print(f"{webpage};{r.status_code};Google API (fonts etc)")
        if i == 0:
            print(f"{webpage};{r.status_code};unknown")
    except:
        print(f"{webpage};Error;Error")

URL;HTTP code;Analytics
http://akademssr.se;200;unknown
http://alingsas.se;200;unknown
http://arbetsgivarverket.se;200;unknown
http://cgmail.se;200;Google Tag Manager
http://ekerobostader.se;200;Matomo
http://ekerobostader.se;200;Piwik
http://fast2.se;200;Matomo
http://forte.se;200;Vizzit
http://inera.se;200;Google Tag Manager
http://ist.com;200;Google Analytics
http://kau.se;200;unknown
http://kriminalvarden.se;200;Matomo
http://lakemedelsverket.se;200;Google Tag Manager
http://lfskaraborg.se;200;Google Analytics
http://lfskaraborg.se;200;Google Tag Manager
http://merab.se;200;Matomo
http://merab.se;200;Piwik
http://mindofmyown.org.uk;200;unknown
http://mucf.se;200;Google Tag Manager
http://ninjafilm.com;200;unknown
http://nsr.se;200;Matomo
http://nsr.se;200;Google Tag Manager
http://paxxo.se;200;Google Analytics
http://pensionsmyndigheten.se;200;unknown
http://polisen.se;200;unknown
http://regionorebrolan.se;200;unknown
http://riksarkivet.se;200;Matomo
http://riksarkivet.se;200;Googl

## Check compliance

In [18]:
import requests

def only_allowed_third_parties(url='https://1177.se'):
    """
    Checks HTML for not allowed third parties such as Google Analytics, etc.
    
    Attribute: 'url' as fully qualified URL, default is 'https://1177.se'
    """
    r = requests.get(url, timeout=20)
    
    if 'ga.js' in r.text.lower() or 'www.google-analytics.com/analytics.js' in r.text.lower(): # Google Analytics
        return False
    elif 'gtm.js' in r.text.lower(): # Google Tag Manager
        return False
    elif '//assets.adobedtm.com/' in r.text.lower(): # Adobe Analytics
        return False
    
    return True

i = 0
print(f"#;URL;Passed third party check")
for webpage in webpages_to_check:
    i = i +1
    
    try:
        check = only_allowed_third_parties(webpage)
        print(f"{i};{webpage};{check}")
    except:
        print(f"{i};{webpage};Error")

#;URL;Passed third party check
1;https://www.arvidsjaur.se;False
2;https://www.berg.se;False
3;https://bolagsverket.se;False
4;https://bollebygd.se;False
5;https://www.botkyrka.se;False
6;https://www.boverket.se;True
7;http://www.bastad.se;False
8;https://www.csn.se;False
9;https://www.degerfors.se;False
10;https://www.dorotea.se;False
11;https://eskilstuna.se;False
12;https://kommun.falkenberg.se;False
13;https://www.falkoping.se;False
14;https://www.falun.se;False
15;https://www.forshaga.se;False
16;https://www.gnesta.se;True
17;https://www.grums.se;True
18;https://gullspang.se/Gullspangs-kommun.html;True
19;https://www.gavle.se;False
20;https://www.gotene.se;False
21;https://www.halmstad.se;False
22;https://haparanda.se;False
23;https://www.havochvatten.se;False
24;https://heby.se;False
25;https://helsingborg.se;False
26;https://www.huddinge.se;False
27;https://www.hultsfred.se;False
28;https://www.hylte.se;False
29;https://www.habo.se;False
30;https://www.hellefors.se;False
31;http

240;https://www.harpsund.se;False
241;https://www.hedemora.se;False
242;http://www.helsingborgstingsratt.domstol.se;False
243;http://www.herrljunga.se;False
244;https://hjo.se;True
245;https://www.hofors.se;Error
246;http://www.hovrattenfornedrenorrland.se;False
247;http://www.hovrattenskaneblekinge.domstol.se;False
248;http://www.vastrahovratten.domstol.se;False
249;http://www.hovrattenovrenorrland.domstol.se;False
250;https://www.hudiksvall.se;True
251;http://www.hudiksvallstingsratt.domstol.se;False
252;http://www.hyresnamnden.se;False
253;http://www.hsan.se;False
254;https://www.herjedalen.se;False
255;https://www.harryda.se;False
256;https://www.hassleholm.se;False
257;http://www.hassleholmstingsratt.domstol.se;False
258;https://www.hoganas.se/Invanare/;Error
259;https://www.hogsby.se;False
260;https://www.du.se;False
261;https://www.hb.se;True
262;https://www.hh.se;False
263;https://www.his.se;False
264;https://www.hkr.se;False
265;https://www.hv.se;False
266;http://www.hogstadom

472;https://www.sll.se;False
473;http://www.stockholmstingsratt.se;False
474;https://www.su.se;False
475;https://www.storfors.se;False
476;https://www.storuman.se;False
477;https://www.stromstad.se;False
478;http://www.sundsvallstingsratt.se;False
479;http://www.surahammar.se;False
480;https://www.svalov.se;False
481;http://www.svea.se;False
482;https://www.svedala.se;False
483;https://www.svenljunga.se;True
484;https://www.esf.se;False
485;https://si.se;False
486;https://www.sieps.se;False
487;https://www.svk.se;False
488;https://www.svff.se;True
489;https://www.slu.se;False
490;https://www.riksbank.se;False
491;https://saffle.se;False
492;https://www.sakint.se;True
493;https://www.sakerhetspolisen.se;True
494;https://www.sater.se;False
495;https://www.savsjo.se;False
496;http://www.sodertaljetingsratt.domstol.se;False
497;https://www.sh.se;False
498;http://www.sodertornstingsratt.domstol.se;False
499;https://www.tibro.se;False
500;https://tillvaxtverket.se;False
501;https://tingsryd.