In [5]:
!pip install wget

import requests
import json
from jsonschema import validate
import wget
from os.path import isfile
from os import remove
import pandas as pd



In [6]:
class MeltWaterClient:
    
    
    class RequestHandler:
        
        def __init__(self, url, user_key, access_token):
            self.__user_key__ = user_key
            self.__access_token__ = access_token
            
        def get(self, url):
            """
            Sends an authenticated HTTP request with the GET method to the specified Meltwater endpoint.
            Handles the error, if any, or returns the requested resource in JSON format.
            """
            response = requests.get(url, 
                                    headers={
                                        "Accept": "application/json",
                                        "user-key": self.__user_key__,
                                        "Authorization": f"Bearer {self.__access_token__}"
                                    })
            if not response.ok:
                print(f"{response.status_code}: {response.reason}")
                print(response.json())
                raise Exception("API request failed")
            else:
                return response.json()
            
        def delete(self, url):
            """
            Sends an authenticated HTTP request with the DELETE method to the specified Meltwater endpoint.
            Handles the error, if any.
            """
            response = requests.delete(url, 
                                       headers={
                                           "Accept": "application/json",
                                           "user-key": self.__user_key__,
                                           "Authorization": f"Bearer {self.__access_token__}"
                                       })
            if not response.ok:
                print(f"{response.status_code}: {response.reason}")
                print(response.json())
                raise Exception("API request failed")
            
        def post(self, url, payload):
            """
            Sends an authenticated HTTP request with the POST method to the specified Meltwater endpoint.
            Handles the error, if any, or returns the created resource in JSON format.
            """
            response = requests.post(url, 
                                     headers={
                                         "Content-Type": "application/json",
                                         "Accept": "application/json",
                                         "user-key": self.__user_key__,
                                         "Authorization": f"Bearer {self.__access_token__}"
                                     },
                                     data=json.dumps(payload))
            if not response.ok:
                print("Search error")
                print(f"{response.status_code}: {response.reason}")
                print(response.json())
                raise Exception("API request failed")
            else:
                return response.json()
            
        def put(self, url, payload):
            """
            Sends an authenticated HTTP request with the PUT method to the specified Meltwater endpoint.
            Handles the error, if any.
            """
            response = requests.put(url, 
                                    headers={
                                        "Content-Type": "application/json",
                                        "Accept": "application/json",
                                        "user-key": self.__user_key__,
                                        "Authorization": f"Bearer {self.__access_token__}"
                                    },
                                    data=json.dumps(payload))
            if not response.ok:
                print("Search error")
                print(f"{response.status_code}: {response.reason}")
                print(response.json())
                raise Exception("API request failed")
                
            
    class Searches(RequestHandler):
        
        def __init__(self, api_version, user_key, access_token):
            self.endpoint_url = f"https://api.meltwater.com/export/v{api_version}/searches"
            self.api_version = api_version
            super().__init__(self.endpoint_url, user_key, access_token)
        
        def __validate_search_id__(self, search_id) -> None:
            """Validates the ID of the requested search."""
            if not isinstance(search_id, int):
                raise TypeError("The ID must be an integer.")
            if search_id <= 0:
                raise ValueError("The ID must be a positive integer.")
                
        def get(self, search_id: int=None) -> object:
            """Get an individual search if search_id is specified, returns a list of all searches otherwise."""
            endpoint_url = self.endpoint_url
            if self.api_version == 1:
                if search_id != None:
                    self.__validate_search_id__(search_id)
                    endpoint_url = f"{endpoint_url}/{search_id}"
                return super().get(endpoint_url)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
        
        def delete(self, search_id: int) -> None: 
            """Delete an individual search."""
            endpoint_url = self.endpoint_url
            self.__validate_search_id__(search_id)
            if self.api_version == 1:
                endpoint_url = f"{endpoint_url}/{search_id}"
                super().delete(endpoint_url)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
        
        def create(self, params: object, dry_run: bool=False) -> object:
            """Create a search."""
            endpoint_url = self.endpoint_url
            if self.api_version == 1:
                if dry_run:
                    endpoint_url = f"{endpoint_url}?dry_run=true"
                return super().post(endpoint_url, payload=params)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
                
        def update(self, search_id: str, params: object, dry_run: bool=False) -> object:
            """Update an individual search."""
            endpoint_url = self.endpoint_url
            self.__validate_search_id__(search_id)
            if self.api_version == 1:
                endpoint_url = f"{endpoint_url}/{search_id}"
                if dry_run:
                    endpoint_url = f"{endpoint_url}?dry_run=true"
                return super().put(endpoint_url, payload=params)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
        
        def count(self, search_id: int) -> object:
            """Get an approximate count of results for the search over a particual period."""
            endpoint_url = self.endpoint_url
            if self.api_version == 1:
                self.__validate_search_id__(search_id)
                endpoint_url = f"{endpoint_url}/{search_id}/count"
                response = super().get(endpoint_url)
                return response["count"]["total"]
            else:
                raise Exception(f"API version {self.api_version} not supported.")
        
        
    class Exports(RequestHandler):
        
        def __init__(self, api_version, user_key, access_token):
            self.endpoint_url = f"https://api.meltwater.com/export/v{api_version}/exports/one-time"
            self.api_version = api_version
            super().__init__(self.endpoint_url, user_key, access_token)
        
        def __validate_export_id__(self, export_id: int) -> None:
            if not isinstance(export_id, int):
                raise TypeError("The ID must be an integer.")
            if export_id <= 0:
                raise ValueError("The ID must be a positive integer.")
       
        def get(self, export_id: int=None) -> object:
            """Get details about a one-time export if export_id is specified, returns a list of all one-time exports otherwise."""
            endpoint_url = self.endpoint_url
            if self.api_version == 1:
                if export_id != None:
                    self.__validate_export_id__(export_id)
                    endpoint_url = f"{endpoint_url}/{export_id}"
                return super().get(endpoint_url)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
                
        def delete(self, export_id: int) -> None: 
            """Removes an existing recurring export."""
            endpoint_url = self.endpoint_url
            self.__validate_export_id__(export_id)
            if self.api_version == 1:
                endpoint_url = f"{endpoint_url}/{export_id}"
                super().delete(endpoint_url)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
                
        def create(self, params: object) -> object:
            """Creates a new one-time export."""
            endpoint_url = self.endpoint_url
            if self.api_version == 1:
                return super().post(endpoint_url, payload=params)
            else:
                raise Exception(f"API version {self.api_version} not supported.")
                
        def load(self, export_id: int) -> pd.DataFrame:
            """
            Loads a one-time export in a Pandas dataframe.
            The one-time export must have the status 'FINISHED'.
            """
            self.__validate_export_id__(export_id)
            # Define temporary file to save the downloaded data
            filename_raw_data = "to_be_deleted"
            if isfile(filename_raw_data):
                remove(filename_raw_data)
            # Get the one-time export details from Meltwater
            response = self.get(export_id)
            # Get the URL of the data file
            if not "onetime_export" in response or not "status" in response["onetime_export"] or response["onetime_export"]["status"] != "FINISHED":
                raise Exception("The export job is not finished.")
            data_url = response["onetime_export"]["data_url"]
            # Download the data
            wget.download(data_url, filename_raw_data)
            with open(filename_raw_data, "r", encoding="utf-8") as f:
                data = json.loads(f.read())
            # Remove the temporary file
            if isfile(filename_raw_data):
                remove(filename_raw_data)
            # Return the data in a Pandas DataFrame
            return pd.DataFrame.from_records(data["data"])
        
        
    def __init__(self, params: object):
        
        schema = {
            "type" : "object",
            "properties" : {
                "user_key": {"type": "string"},
                "client_id": {"type": "string"},
                "client_secret": {"type": "string"},
                "api_url": {"type": "string"},
                "version": {"type": "number"},
                "access_token": {"type": "string"},
            },
            "required": ["user_key", "client_id", "client_secret", "version"]
        }
        validate(instance=params, schema=schema)

        self.__user_key__ = params["user_key"]
        self.__client_id__ = params["client_id"]
        self.__client_secret__ = params["client_secret"]
        if "api_url" in params:
            self.__api_url__ = params["api_url"]
        else:
            self.__api_url__ = "https://api.meltwater.com"
        self.__version__ = params["version"]
        if "access_token" in params:
            self.__access_token__ = params["access_token"]
        else:
            self.__auth__()
        
        self.searches = self.Searches(self.__version__, self.__user_key__, self.__access_token__)
        self.exports = self.Exports(self.__version__, self.__user_key__, self.__access_token__)
    
    def __auth__(self):
        """Authenticate to Meltwater."""
        oauth_url = f"{self.__api_url__}/oauth2/access_token"
        response = requests.post(oauth_url, 
                                 auth=(self.__client_id__, self.__client_secret__),
                                 headers={
                                     "content-type": "application/x-www-form-urlencoded",
                                     "user-key": self.__user_key__
                                 },
                                 data="grant_type=client_credentials&scope=search")
        if not response.ok:
            print("Authentication error")
            print(f"{response.status_code}: {response.reason}")
            print(response.json())
        else:
            response = response.json()
            self.__access_token__ = response["access_token"]
            print(response)
            

## Ad-hoc testing

In [7]:
meltwater = MeltWaterClient({
    "user_key": "c0572f26b504c55327eb32fd6027b8ea",
    "client_id": "5ed8f3745054c50010ad2696",
    "client_secret": "T5ZUo0FjQiv/Cy/SdaJXjBIvKpn1whjzpg==",
    "version": 1,
    #"access_token": "pAeCHFy-fzdzszx6Mf-uSYOWcIb8DY4HOxqbker45HY.w8q3Dxxex0B8GuPLdhrnwfXvnI0ZLKLop_ctt945NPE"
})

{'access_token': '9bVV9tWW46YbO6PhgS4bT6_OgN07UI4SL_JXWTqK-Tg.6_PbASyQL_03PcKQ7CK9IZdCfOQvWzUbWXHa2YuyY9U', 'expires_in': 3599, 'scope': 'search', 'token_type': 'bearer'}



#### Searches API

- meltwater.searches.get()
- meltwater.searches.create()
- meltwater.searches.count()
- meltwater.searches.update()
- meltwater.searches.delete()

#### Exports API

- meltwater.exports.get()
- meltwater.exports.create()
- meltwater.exports.delete()
- meltwater.exports.load()


### Searches API

#### Get a list of all your searches

In [8]:
meltwater.searches.get()

{'searches': [{'updated': '2020-07-08T09:48:54.000Z',
   'type': 'news',
   'search_id': 9438801,
   'name': 'NEW NAME FOR THIS SEARCH!!!',
   'id': 9438801},
  {'updated': '2020-07-09T09:33:18.000Z',
   'type': 'explore',
   'search_id': 9470285,
   'name': 'flight restriction basic',
   'id': 9470285},
  {'updated': '2020-07-09T10:53:16.000Z',
   'type': 'news',
   'search_id': 9472065,
   'name': 'My new test search',
   'id': 9472065},
  {'updated': '2020-07-09T12:31:51.000Z',
   'type': 'explore',
   'search_id': 9474155,
   'name': 'health department search',
   'id': 9474155},
  {'updated': '2020-07-09T12:45:57.000Z',
   'type': 'explore',
   'search_id': 9474495,
   'name': 'foreign ministry',
   'id': 9474495},
  {'updated': '2020-07-09T13:08:27.000Z',
   'type': 'news',
   'search_id': 9474979,
   'name': 'My new test search 2222',
   'id': 9474979},
  {'updated': '2020-07-15T13:27:27.000Z',
   'type': 'explore',
   'search_id': 9475201,
   'name': 'Train fears',
   'id': 947

#### Create a new search

In [9]:

new_search_obj = {
    "search": {
        "type": "news",
        "query": {
            "type": "boolean",
            "source_selection_id": 1,
            "case_sensitivity": "no",
            "boolean": "Tesla OR (Volvo NEAR electric)"
        },
        "name": "TEST SEARCH 123"
    }
}

new_search = meltwater.searches.create(new_search_obj)

print("New search:", new_search)

new_search_id = new_search["search"]["id"]


New search: {'search': {'updated': '2020-07-17T13:13:17.599Z', 'type': 'news', 'search_id': 9712867, 'query': {'type': 'boolean', 'source_selection_id': 1, 'case_sensitivity': 'no', 'boolean': 'Tesla OR (Volvo NEAR electric)'}, 'name': 'TEST SEARCH 123', 'id': 9712867}}


#### Get an individual search

In [10]:
meltwater.searches.get(new_search_id)

{'search': {'updated': '2020-07-17T13:13:17.000Z',
  'type': 'news',
  'search_id': 9712867,
  'query': {'type': 'boolean',
   'source_selection_id': 1,
   'case_sensitivity': 'no',
   'boolean': 'Tesla OR (Volvo NEAR electric)'},
  'name': 'TEST SEARCH 123',
  'id': 9712867}}

#### Get an approximate count of results for the search over a particual period

In [11]:
meltwater.searches.count(new_search_id)

52163

#### Update an individual search

In [12]:
updated_search_obj ={
    "search": {
        "type": "news",
        "query": {
            "type": "boolean",
            "source_selection_id": 1,
            "case_sensitivity": "no",
            "boolean": "Tesla OR (Volvo NEAR electric)"
        },
        "name": "TEST SEARCH 123 - 2"
    }
}

meltwater.searches.update(new_search_id, updated_search_obj)

#### Delete an individual search

In [None]:
meltwater.searches.delete(new_search_id)

### One-time Export API

- meltwater.exports.get()
- meltwater.exports.create()
- meltwater.exports.delete()
- meltwater.exports.load()

#### Get a list of all your one-time exports 

In [13]:
meltwater.exports.get()

{'onetime_exports': [{'updated_at': '2020-07-08T09:52:43.390840',
   'tags': [],
   'status_reason': '',
   'status': 'FINISHED',
   'start_date': '2020-07-07T09:51:56.600908Z',
   'searches': [{'name': 'NEW NAME FOR THIS SEARCH!!!', 'id': 9438801}],
   'inserted_at': '2020-07-08T09:51:57.711720',
   'id': 1013778,
   'end_date': '2020-07-08T09:51:56.600832Z',
   'data_url': 'https://exports.meltwater.com/v1/one-time/1013778?data_key=572069a1-3623-494d-8aa4-a12b140941f9',
   'company_name': 'Emergent Alliance - Fairhair'},
  {'updated_at': '2020-07-07T16:20:30.302068',
   'tags': [],
   'status_reason': '',
   'status': 'FINISHED',
   'start_date': '2020-07-06T16:19:46.874355Z',
   'searches': [{'name': 'NEW NAME FOR THIS SEARCH!!!', 'id': 9416747}],
   'inserted_at': '2020-07-07T16:19:47.864748',
   'id': 1007480,
   'end_date': '2020-07-07T16:19:46.874283Z',
   'data_url': 'https://exports.meltwater.com/v1/one-time/1007480?data_key=3bf151cf-cb02-4458-bb3b-d2a439da36c2',
   'company_n

#### Creates a new one-time export

In [14]:
from datetime import datetime, timedelta

now = datetime.now().isoformat()
one_day_ago = (datetime.today() - timedelta(days=1)).isoformat()

new_export_obj = {
    "onetime_export": {
        "start_date": one_day_ago,
        "end_date": now,
        "search_ids": [new_search_id]
    }  
}

new_export = meltwater.exports.create(new_export_obj)

print("New export:", new_export)

new_export_id = new_export["onetime_export"]["id"]


New export: {'onetime_export': {'updated_at': '2020-07-17T13:13:55.511527', 'tags': [], 'status_reason': 'Export run has not completed yet', 'status': 'PENDING', 'start_date': '2020-07-16T13:13:54.698641Z', 'searches': [{'name': 'TEST SEARCH 123 - 2', 'id': 9712867}], 'inserted_at': '2020-07-17T13:13:55.511519', 'id': 1091700, 'end_date': '2020-07-17T13:13:54.698561Z', 'data_url': 'https://exports.meltwater.com/v1/one-time/1091700?data_key=647d28b3-93e8-476b-b098-cd035eb33304', 'company_name': 'Emergent Alliance - Fairhair'}}


#### Get details of a one-time export

In [21]:
meltwater.exports.get(new_export_id)

{'onetime_export': {'updated_at': '2020-07-17T13:14:38.507443',
  'tags': [],
  'status_reason': '',
  'status': 'FINISHED',
  'start_date': '2020-07-16T13:13:54.698641Z',
  'searches': [{'name': 'TEST SEARCH 123 - 2', 'id': 9712867}],
  'inserted_at': '2020-07-17T13:13:55.511519',
  'id': 1091700,
  'end_date': '2020-07-17T13:13:54.698561Z',
  'data_url': 'https://exports.meltwater.com/v1/one-time/1091700?data_key=647d28b3-93e8-476b-b098-cd035eb33304',
  'company_name': 'Emergent Alliance - Fairhair'}}

#### Removes an existing one-time export

In [None]:
meltwater.exports.delete(new_export_id)

#### Load a one-time export into a Pandas DataFrame

In [22]:
df = meltwater.exports.load(new_export_id)
df.head()

Unnamed: 0,document_authors,document_hidden,document_hit_sentence,document_id,document_image_link,document_key_phrases,document_language_code,document_matched_keywords,document_opening_text,document_publish_date,...,metadata,source_ave,source_country_code,source_id,source_information_type,source_name,source_reach,source_reach_desktop,source_reach_mobile,source_subregion
0,[],False,"Elektrolyse, Tankstellen und mehr, Milliardenm...",w_IHWMvtCcDI3m-nbjIZjtf8Yn0,,"[Wert, Lock, Mio. Euro, Einbringung, GORE, Akt...",de,[TESLA],17.07.2020 -Die PREOS Real Estate AG (ISIN:DE0...,2020-07-17T09:20:23.450Z,...,"{'tag_name': '', 'search_name': 'TEST SEARCH 1...",51.35,de,0A003C7FEB222FCBB535025EE74F6F24,news,Nebenwerte Magazin,5551,2002,3549,
1,[],False,WAIC Revealing Disruptive Innovations in Robot...,A9Ka50u6jdUywQshaHMsi4Lz5hk,,"[intelligent robot, artificial intelligence, i...",en,"[Tesla, tesla]","HONG KONG, CHINA / ACCESSWIRE / July 17, 2020 ...",2020-07-17T07:00:16.688Z,...,"{'tag_name': '', 'search_name': 'TEST SEARCH 1...",0.29,us,3626E475565572703605FDBC85A0B435,news,The Post and Mail - FinancialContent,31,31,0,
2,[],False,أجزاء من البطارية ومحركات كهربائية للاستخدام ف...,rL4PWG0fTS6vkbXEVOoMSs3MgWA,https://img.youm7.com/ArticleImgs/2020/7/16/82...,"[مصنع للسيارات, برلين, نموذجى لمصنع, أوروبا, إ...",ar,[Tesla],شارك إيلون ماسك، الرئيس التنفيذى لشركة تسلا لل...,2020-07-17T02:42:27.076Z,...,"{'tag_name': '', 'search_name': 'TEST SEARCH 1...",36.26,ae,543EB6B0DC3DF3D1E0DC97E8FE9CB11B,news,اخبار ترند العالمية,3920,930,2990,
3,[],False,. The company’s newly gained prominence has al...,QYVaopShSmkG7MB9qXEhMirAtcA,,"[fuel economy, Bill Lerner, New York, Imperial...",en,[Tesla],Hybrid (HEVs) and plug-in electric vehicles (E...,2020-07-17T04:01:09.627Z,...,"{'tag_name': '', 'search_name': 'TEST SEARCH 1...",0.0,us,904A97442730D46B1A54AB63B338E523,news,Asean Coverage - Entertainment [Newswire],0,0,0,
4,[],False,WAIC Revealing Disruptive Innovations in Robot...,IHiSrnBoc6_elvD_ULEgfvh-PlI,https://s.yimg.com/cv/apiv2/social/images/yaho...,"[intelligent robot, artificial intelligence, i...",en,"[Tesla, tesla]","HONG KONG, CHINA / ACCESSWIRE / July 17, 2020 ...",2020-07-17T07:00:00.369Z,...,"{'tag_name': '', 'search_name': 'TEST SEARCH 1...",458726.83,us,2F317A7BF9F3E85A6A2894D049F45611,news,Yahoo! Finance,49592090,22836565,26755525,


In [40]:
df.columns

Index(['document_authors', 'document_hidden', 'document_hit_sentence',
       'document_id', 'document_image_link', 'document_key_phrases',
       'document_language_code', 'document_matched_keywords',
       'document_opening_text', 'document_publish_date', 'document_sentiment',
       'document_social_echo_facebook', 'document_social_echo_reddit',
       'document_social_echo_twitter', 'document_tags', 'document_title',
       'document_url', 'document_visibility', 'metadata', 'source_ave',
       'source_country_code', 'source_id', 'source_information_type',
       'source_name', 'source_reach', 'source_reach_desktop',
       'source_reach_mobile', 'source_subregion'],
      dtype='object')