From 358a93f6e67dfee49cb9a34fffd9cafb9235b866 Mon Sep 17 00:00:00 2001 From: Matan Yechiel Date: Mon, 8 May 2023 17:13:27 +0300 Subject: [PATCH 1/5] support latest analysis for url analysis --- CHANGES | 4 ++++ examples/url_latest_analysis.py | 13 +++++++++++++ intezer_sdk/__init__.py | 2 +- intezer_sdk/analyses_history.py | 2 +- intezer_sdk/analysis.py | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 examples/url_latest_analysis.py diff --git a/CHANGES b/CHANGES index 3d4d253..af58b1c 100644 --- a/CHANGES +++ b/CHANGES @@ -3,6 +3,10 @@ ____ - Add support for proxies - Remove deprecated edr assessment routes +1.16.7 +____ +- Support latest analysis for URL analysis + 1.16.6 ____ - Fix family tag caching when no tags are returned diff --git a/examples/url_latest_analysis.py b/examples/url_latest_analysis.py new file mode 100644 index 0000000..1bbfde6 --- /dev/null +++ b/examples/url_latest_analysis.py @@ -0,0 +1,13 @@ +from pprint import pprint + +from intezer_sdk import api +from intezer_sdk.analysis import UrlAnalysis + + +def get_url_latest_analysis(url: str): + api.set_global_api('519643d2-f373-40c1-9616-d4650c4741ee') + analysis = UrlAnalysis.from_latest_analysis(url) + pprint(analysis.result()) + +if __name__ == '__main__': + get_url_latest_analysis('https://www.google.com/') \ No newline at end of file diff --git a/intezer_sdk/__init__.py b/intezer_sdk/__init__.py index a61a499..f3eabdc 100644 --- a/intezer_sdk/__init__.py +++ b/intezer_sdk/__init__.py @@ -1 +1 @@ -__version__ = '1.16.6' +__version__ = '1.16.7' diff --git a/intezer_sdk/analyses_history.py b/intezer_sdk/analyses_history.py index 9a810ca..1bc5f67 100644 --- a/intezer_sdk/analyses_history.py +++ b/intezer_sdk/analyses_history.py @@ -167,7 +167,7 @@ def generate_analyses_history_filter(*, 'offset': offset } if aggregated_view is not None: - base_filter['aggregate_view'] = aggregated_view + base_filter['aggregated_view'] = aggregated_view if sources: base_filter['sources'] = sources if verdicts: diff --git a/intezer_sdk/analysis.py b/intezer_sdk/analysis.py index 2226a5f..2a874ca 100644 --- a/intezer_sdk/analysis.py +++ b/intezer_sdk/analysis.py @@ -1,6 +1,7 @@ import datetime import logging import os +import re from http import HTTPStatus from typing import BinaryIO from typing import IO @@ -17,6 +18,7 @@ from intezer_sdk import operation from intezer_sdk._api import IntezerApi from intezer_sdk._util import deprecated +from intezer_sdk.analyses_history import query_url_analyses_history from intezer_sdk.api import IntezerApiClient from intezer_sdk.api import get_global_api from intezer_sdk.base_analysis import Analysis @@ -349,6 +351,36 @@ def from_analysis_id(cls, analysis_id: str, api: IntezerApiClient = None) -> Opt response = IntezerApi(api or get_global_api()).get_url_analysis_response(analysis_id, True) return cls._create_analysis_from_response(response, api, analysis_id) + @classmethod + def from_latest_analysis(cls, + url: str, + api: IntezerApiClient = None) -> Optional['UrlAnalysis']: + now = datetime.datetime.now() + yesterday = now - datetime.timedelta(days=1) + + analysis_history_url_result = query_url_analyses_history(start_date=yesterday, + end_date=now, + api=api) + all_analyses_reports = analysis_history_url_result.all() + + analyses_ids = [report['analysis_id'] for report in all_analyses_reports if cls._clean_url(report['scanned_url']) == cls._clean_url(url)] + + if not analyses_ids: + return None + + return cls.from_analysis_id(analyses_ids[0]) + + @staticmethod + def _clean_url(url: str) -> str: + """ + Remove http:// or https:// or www. from the beginning of the URL, + and / from the end of the URL. + """ + url = re.sub(r'^https?://(www\.)?', '', url) + url = re.sub(r'\/$', '', url) + + return url + def _set_report(self, report: dict): super()._set_report(report) if not self.url: From f198fac5135d74dac59de6f4c8bee31b7bb0ee67 Mon Sep 17 00:00:00 2001 From: Matan Yechiel Date: Mon, 8 May 2023 17:13:48 +0300 Subject: [PATCH 2/5] remove creds --- examples/url_latest_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/url_latest_analysis.py b/examples/url_latest_analysis.py index 1bbfde6..2987f69 100644 --- a/examples/url_latest_analysis.py +++ b/examples/url_latest_analysis.py @@ -5,7 +5,7 @@ def get_url_latest_analysis(url: str): - api.set_global_api('519643d2-f373-40c1-9616-d4650c4741ee') + api.set_global_api('') analysis = UrlAnalysis.from_latest_analysis(url) pprint(analysis.result()) From dff6c2faba99f6312d903ce7e2634da8ecf84c0d Mon Sep 17 00:00:00 2001 From: Matan Yechiel Date: Mon, 8 May 2023 17:53:20 +0300 Subject: [PATCH 3/5] add tests --- CHANGES | 7 ++--- examples/url_latest_analysis.py | 3 ++- intezer_sdk/analysis.py | 27 ++++++++++---------- tests/unit/test_url_analysis.py | 45 +++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/CHANGES b/CHANGES index af58b1c..e14bfc6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,11 +1,8 @@ -Unreleased -____ -- Add support for proxies -- Remove deprecated edr assessment routes - 1.16.7 ____ - Support latest analysis for URL analysis +- Add support for proxies +- Remove deprecated edr assessment routes 1.16.6 ____ diff --git a/examples/url_latest_analysis.py b/examples/url_latest_analysis.py index 2987f69..c82c8a1 100644 --- a/examples/url_latest_analysis.py +++ b/examples/url_latest_analysis.py @@ -1,3 +1,4 @@ +import sys from pprint import pprint from intezer_sdk import api @@ -10,4 +11,4 @@ def get_url_latest_analysis(url: str): pprint(analysis.result()) if __name__ == '__main__': - get_url_latest_analysis('https://www.google.com/') \ No newline at end of file + get_url_latest_analysis(*sys.argv[1:]) \ No newline at end of file diff --git a/intezer_sdk/analysis.py b/intezer_sdk/analysis.py index 2a874ca..4cf5148 100644 --- a/intezer_sdk/analysis.py +++ b/intezer_sdk/analysis.py @@ -311,6 +311,15 @@ def get_file_analysis_by_id(analysis_id: str, api: IntezerApi = None) -> Optiona def get_analysis_by_id(analysis_id: str, api: IntezerApi = None) -> Optional[FileAnalysis]: return get_file_analysis_by_id(analysis_id, api) +def _clean_url(url: str) -> str: + """ + Remove http:// or https:// or www. from the beginning of the URL, + and / from the end of the URL. + """ + url = re.sub(r'^https?://(www\.)?', '', url) + url = re.sub(r'\/$', '', url) + + return url class UrlAnalysis(Analysis): """ @@ -354,33 +363,25 @@ def from_analysis_id(cls, analysis_id: str, api: IntezerApiClient = None) -> Opt @classmethod def from_latest_analysis(cls, url: str, + days_threshold_for_latest_analysis: int = 1, api: IntezerApiClient = None) -> Optional['UrlAnalysis']: now = datetime.datetime.now() - yesterday = now - datetime.timedelta(days=1) + yesterday = now - datetime.timedelta(days=days_threshold_for_latest_analysis) analysis_history_url_result = query_url_analyses_history(start_date=yesterday, end_date=now, + aggregated_view=True, api=api) all_analyses_reports = analysis_history_url_result.all() - analyses_ids = [report['analysis_id'] for report in all_analyses_reports if cls._clean_url(report['scanned_url']) == cls._clean_url(url)] + analyses_ids = [report['analysis_id'] for report in all_analyses_reports + if _clean_url(url) in (_clean_url(report['scanned_url']), _clean_url(report['submitted_url']))] if not analyses_ids: return None return cls.from_analysis_id(analyses_ids[0]) - @staticmethod - def _clean_url(url: str) -> str: - """ - Remove http:// or https:// or www. from the beginning of the URL, - and / from the end of the URL. - """ - url = re.sub(r'^https?://(www\.)?', '', url) - url = re.sub(r'\/$', '', url) - - return url - def _set_report(self, report: dict): super()._set_report(report) if not self.url: diff --git a/tests/unit/test_url_analysis.py b/tests/unit/test_url_analysis.py index 8effdfe..e7250a9 100644 --- a/tests/unit/test_url_analysis.py +++ b/tests/unit/test_url_analysis.py @@ -201,3 +201,48 @@ def test_url_analysis_doesnt_reference_file_analysis_when_not_exists(self): # Assert self.assertIsNone(analysis.downloaded_file_analysis) + + def test_get_url_latest_analysis(self): + # Arrange + url = 'https://intezer.com' + analysis_id = str(uuid.uuid4()) + get_analysis_result = {'analysis_id': analysis_id, 'submitted_url': url} + fetch_history_result = {'analyses': [{'analysis_id': analysis_id, 'scanned_url': url, 'submitted_url': url}], + 'total_count': 1} + + with responses.RequestsMock() as mock: + mock.add('POST', + url=self.full_url + '/url-analyses/history', + status=200, + json=fetch_history_result) + mock.add('GET', + url='{}/url/{}'.format(self.full_url, analysis_id), + status=200, + json={'result': get_analysis_result, 'status': 'succeeded'}) + + + # Act + analysis = UrlAnalysis.from_latest_analysis(url) + + # Assert + self.assertEqual(analysis.url, url) + + def test_get_url_latest_analysis_analyses_not_found(self): + # Arrange + url = 'https://intezer.com' + analysis_id = str(uuid.uuid4()) + fetch_history_result = {'analyses': [], + 'total_count': 0} + + with responses.RequestsMock() as mock: + mock.add('POST', + url=self.full_url + '/url-analyses/history', + status=200, + json=fetch_history_result) + + + # Act + analysis = UrlAnalysis.from_latest_analysis(url) + + # Assert + self.assertIsNone(analysis) \ No newline at end of file From 84deadd4f63f80a306eed29eb317bf86dac38f33 Mon Sep 17 00:00:00 2001 From: Matan Yechiel Date: Mon, 8 May 2023 17:53:36 +0300 Subject: [PATCH 4/5] remove unnessesary --- tests/unit/test_url_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_url_analysis.py b/tests/unit/test_url_analysis.py index e7250a9..586c12e 100644 --- a/tests/unit/test_url_analysis.py +++ b/tests/unit/test_url_analysis.py @@ -230,7 +230,6 @@ def test_get_url_latest_analysis(self): def test_get_url_latest_analysis_analyses_not_found(self): # Arrange url = 'https://intezer.com' - analysis_id = str(uuid.uuid4()) fetch_history_result = {'analyses': [], 'total_count': 0} From 51e6f3c8a47c4235e1f966cbea03a68c7ac1ddff Mon Sep 17 00:00:00 2001 From: Matan Yechiel Date: Mon, 8 May 2023 17:56:07 +0300 Subject: [PATCH 5/5] add tests and examples --- examples/url_latest_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/url_latest_analysis.py b/examples/url_latest_analysis.py index c82c8a1..0768d02 100644 --- a/examples/url_latest_analysis.py +++ b/examples/url_latest_analysis.py @@ -8,7 +8,8 @@ def get_url_latest_analysis(url: str): api.set_global_api('') analysis = UrlAnalysis.from_latest_analysis(url) - pprint(analysis.result()) + if analysis: + pprint(analysis.result()) if __name__ == '__main__': get_url_latest_analysis(*sys.argv[1:]) \ No newline at end of file