In [3]:
from __future__ import print_function

import re
from datetime import datetime
from datetime import timezone
from dateutil.tz import tzutc
from tqdm import tqdm
from pprint import pprint
import pandas as pd
import numpy as np
import math
import os
import json
import time
import requests



In [4]:
headers = {'X-AYLIEN-NewsAPI-Application-ID': 'YOUR-ID', 
           'X-AYLIEN-NewsAPI-Application-Key': 'YOUR-KEY'}  

# Functions

In [44]:
def get_stories(params, limit=False):
       
    fetched_stories = []
    stories = None
    params['cursor'] = '*'
    
    if 'per_page' in params.keys():
        if params['per_page'] > limit and not limit==False:
            params['per_page'] = limit 
            
    while stories is None or len(stories) > 0 and (len(fetched_stories) < limit or limit==False):
        response = requests.get('https://api.aylien.com/news/stories', params=params, headers=headers)
        
        if 'x-ratelimit-hit-reset' in response.headers.keys():
            reset_time = response.headers['x-ratelimit-hit-reset']
        
        if response.status_code == 200:
            
            response_json = response.json()
            stories = response_json['stories']
            
            if 'next_page_cursor' in response_json.keys():
                params['cursor'] = response_json['next_page_cursor']
                
            else:
                pprint("No next_page_cursor")
        
            fetched_stories += stories
            
            if len(stories) > 0 and not stories == None:
                print("Fetched %d stories. Total story count so far: %d" %(len(stories), len(fetched_stories)))

        elif response.status_code == 429:
            # Set the script to sleep until next minute starts,
            ## The Rate Limit resets every new minute per-clock,
            nap = 60 - time.localtime().tm_sec
            for s in   tqdm(range( nap ), desc='Taking a nap...zzzZZZzzzZZZ'):
                time.sleep(1)
            continue
        else:
            pprint (response.json())
            break
        
    return fetched_stories

def get_trends(params):

    #Returns a list of up to 100 clusters that meet the parameters set out.
    response = requests.get('https://api.aylien.com/news/trends', params=params, headers=headers)

    pprint(response.json())
    
    if 'errors' in response or 'error' in response:
        pprint(response)
    
    
def get_clusters(params={}):
    
    response = requests.get('https://api.aylien.com/news//clusters', params=params, headers=headers).json()
    
    if 'errors' in response or 'error' in response:
        pprint(response)

    return response['clusters']

def get_timeseries(params):
    
    time_series = None
    response = ""
    
    while time_series is None:
        response = requests.get('https://api.aylien.com/news/time_series', params=params, headers=headers)
        
        if response.status_code == 200:
            
            fetched_ts = response.json()
            
            return fetched_ts
                       
        elif response.status_code == 429:           
            # Set the script to sleep until next minute starts,
            ## The Rate Limit resets every new minute per-clock,
            nap = 60 - time.localtime().tm_sec
            
            for s in   tqdm( range( nap ), desc='Taking a nap...zzzZZZzzzZZZ' ):
                time.sleep(1)
        else:
            pprint (response.json())
            break


# Smart Tagger

### Query using Smart Tagger

In [None]:
params = {  'aql':'categories:{{taxonomy:aylien AND id:(ay.lifesoc.esg OR ay.lifesoc.environ OR ay.lifesoc.climate OR ay.lifesoc.carbem OR ay.lifesoc.envcon OR ay.lifesoc.envener OR ay.lifesoc.natres OR ay.lifesoc.renew OR ay.lifesoc.sustain OR ay.lifesoc.envener AND score:[0.7 TO 1])}} AND categories:{{taxonomy:aylien AND id:(ay.biz.corpgov OR ay.impact.ops OR ay.biz.regulat OR ay.biz.manage)}} AND NOT categories:{{taxonomy:aylien AND id:(ay.pol)}}',
            'published_at_start': 'NOW-5DAYS/DAY',
            'published_at_end': 'NOW/DAY',
            'per_page': 100,
            'source_rankings_alexa_rank_max': 100000,
            'sort_by': 'relevance',
            'source_locations_country':['US'],
            'language[]': ['en']
            }


stories = get_stories(params)

pprint(stories)

### Finding tags from keywords

In [45]:
params = {  'aql': 'title:(("global warming" OR "climate change" OR "greenhouse gas" OR "green energy" OR " fossil fuel" OR "paris climate agreement" OR "climate risk" OR "extreme weather" OR "extreme temperature") AND NOT ("political climate")) AND ("business process" OR "business operations" OR "business transition")',
            'published_at_start': 'NOW-5DAYS/DAY',
            'published_at_end': 'NOW/DAY',
            'per_page': 100,
            'source_rankings_alexa_rank_max': 100000,
            'sort_by': 'relevance',
            'source_locations_country':['US'],
            'language[]': ['en'],
            'categories.taxonomy': 'aylien',
            'field': 'categories.id'           }


trends = get_trends(params)

pprint(trends)

{'field': 'categories.id',
 'published_at.start': '2022-07-12T15:48:51.305Z',
 'trends': [{'count': 7, 'value': 'ay.lifesoc'},
            {'count': 7, 'value': 'ay.lifesoc.esg'},
            {'count': 6, 'value': 'ay.lifesoc.environ'},
            {'count': 5, 'value': 'ay.lifesoc.climate'},
            {'count': 4, 'value': 'ay.appsci'},
            {'count': 4, 'value': 'ay.appsci.tech'},
            {'count': 3, 'value': 'ay.appsci.physical'},
            {'count': 3, 'value': 'ay.appsci.science'},
            {'count': 3, 'value': 'ay.lifesoc.carbem'},
            {'count': 2, 'value': 'ay.appsci.atmos'},
            {'count': 2, 'value': 'ay.biz'},
            {'count': 2, 'value': 'ay.biz.manage'},
            {'count': 2, 'value': 'ay.fin'},
            {'count': 2, 'value': 'ay.fin.riskman'},
            {'count': 2, 'value': 'ay.lifesoc.envcon'},
            {'count': 2, 'value': 'ay.lifesoc.envener'},
            {'count': 2, 'value': 'ay.lifesoc.natres'},
            {'coun