In [None]:
import requests
import json
from pandas.io.json import json_normalize
import pandas as pd

from datetime import datetime, timedelta

import time

### Required Methods

In [None]:
def get_request(url, params = ''):
    response = requests.get(url, params=params)
    if(response.status_code == 200):
        return response.json()
    else:
        return None
## TODO
# function does not distinguish between API error or missing values
def get_report_outcome(report_url):
    # Get report JSON file
    report = get_request(report_url)
    
    ret = {}
    
    ## Additional attributes can be added from the measurement JSON file
    try:
        ret["blocking"] = report['test_keys']['blocking']
    except:
        ret["blocking"] =  'Na'
        
    try:
        ret["probe_platform"] = report['annotations']['platform']
    except:
        ret["probe_platform"] =  'Na'
        
    try:
        ret["probe_network_type"] = report['annotations']['network_type']
    except:
        ret["probe_network_type"] =  'Na'
    
    try:
        ret["dns_ip"] = report['test_keys']['client_resolver']
    except:
        ret["dns_ip"] =  'Na'
    
    try:
        ret["page_title"] = report['test_keys']['control']['http_request']['title']
    except:
        ret["page_title"] =  'Na'
    
    try:
        ret["http_status_code"] = report['test_keys']['control']['http_request']['status_code']
    except:
        ret["http_status_code"] =  'Na'
    
    ## TODO
    # possible to extract more fields from the result JSON

    return ret

# Pull OONI `web_connectivity` Test Data
### Test Description: https://github.com/ooni/spec/blob/master/nettests/ts-017-web-connectivity.md

In [None]:
ret = None

url = "https://api.ooni.io/api/v1/measurements"

# Adjust start and end date
start = datetime.strptime("2019-12-01", "%Y-%m-%d")
end = datetime.strptime("2019-12-05", "%Y-%m-%d")
# Watch out for enough memory in case large periods of measurements are pulled

params = {
        "test_name" : "web_connectivity",  
        "probe_cc" : "GB",
        "limit" : "10000000"
        }

day = start

# Only get data for 1 day and not whole timeframe to avoid API timeout
while day <= end:
    # Set timeframe to get data
    params['since'] = day.strftime("%Y-%m-%d") + "T00:00:00"
    params['until'] = day.strftime("%Y-%m-%d") + "T23:59:59"
    
    print("## Getting data for "+  day.strftime("%Y-%m-%d"))
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        response_json = response.json()
        
        if response_json['metadata']['count'] > 0:
        # iterate through list of all reports
            for r in response_json['results']:         
                outcome = get_report_outcome(r['measurement_url'])
                # Merge Metadata with test outcome
                tmp_df = pd.merge(json_normalize(r), json_normalize(outcome), left_index=True, right_index = True)

                # Add each row to result df
                if ret is None:
                    ret = tmp_df
                else:
                    ret = ret.append(tmp_df)
    
    # API Timeout
    elif response.status_code == 504:
        print("API Timeout")
    
    # Service unavailable
    elif response.status_code == 503:
        # wait and try one more time
        time.sleep(5)
        continue
        
    else:
        print("Bad API response - Status Code: " + str(response.status_code))
                    
    day = day + timedelta(days=1)           
    
    print("Already " + str(len(ret)) + " measurements loaded.")

ret.head()

In [None]:
ret.info()

### Save Data as CSV

In [None]:
# save CSV
ret.to_csv('OONI Web_Con Measurements.csv',index=False)