In [1]:
from utilities.google_client import google_sheet_API
from utilities.athena_client import athena_API
from utilities.slack_client import slack_API
import multiprocessing as mp
import time

In [2]:
# Initialise the 2 APIs 
gsheetAPI = google_sheet_API()
athenaAPI = athena_API()
slackAPI = slack_API()

In [3]:
# Helper function to get the percentage in the right format
def _perc_to_float(perc):
        return float(perc.strip('%'))/100

# Helper function to write logs and console at the same time
def write_log(line):
    print(line)
    with open("log.txt", "a+") as logs:
        logs.write(f"{line}\r\n")

In [4]:
# Helper function to make sure all rows in the dict are valid
def check_row_validity(row):
    if row[1]['perc_increase'] == '' or row[1]['account_id'] == '' or row[1]['site_id'] == '':
        write_log(f'Skipped row {row[0] + 2} as contained empty values')
        return False
    elif row[1]['perc_increase'] == None or row[1]['account_id'] == None or row[1]['site_id'] == None:
        write_log(f'Skipped row {row[0] + 2} as contained Null values')
        return False
    else:
        return True

In [27]:
# Helper function to generate the looker url string
def generate_looker_url(account_id, site_id, url_match):
    url_match_formatted = url_match.replace('/', '%2F')
    return f'https://analytics.distilnetworks.com/dashboards/618?access_time=168%20hours&account_id={account_id}&site_id={site_id}&url_match=%25{url_match_formatted}%25'

In [28]:
# Parallelised function, everything happens here
# It will retrieve the data from Athena
# Run the 2 queries, and update the results in google sheet
def get_data(row_idx, row_dict):
    domain = row_dict.get('domain')
    url = row_dict.get('url')
    perc_increase = row_dict.get('perc_increase')
    
    # Try to run the 2 queries
    try:
        tw_query = athenaAPI.get_pandas_df(row_dict.get('tw_query'))
        lw_query = athenaAPI.get_pandas_df(row_dict.get('lw_query'))
        
        # Values are returned in DataFrame format, extract the single number
        tw_count = int(tw_query.values[0][0])
        lw_count = int(lw_query.values[0][0])
        client_message = 'Successful'
    except Exception as e:        
        tw_count = ''
        lw_count = ''
        slackAPI.send_message(f"Error for domain {domain}, row {row_idx}, query returned error:\n ```{str(e)}```")
        client_message = str(e)

    # Check if the percentage increase is formatted properly
    try:
        perc_increase = _perc_to_float(perc_increase)
    except ValueError as e: 
        perc_increase = 1
        slackAPI.send_message(f"Error for domain {domain}, row {row_idx}, couldn't format the percentage value:\n ```{str(e)}```")
        client_message = str(e)

    # Update google sheet
    cell_address = f'K{row_idx+2}'
    cell_value = [tw_count, lw_count, time.asctime(), client_message]
    result = gsheetAPI.update_sheet(cell_address, cell_value)
    
    # Log results
    write_log(f"{result['updatedRange']} - {cell_value}")
    
    # Looker
    looker_url = generate_looker_url(row_dict.get('account_id')
                                      , row_dict.get('site_id')
                                      , row_dict.get('url_match'))

    # Check logic and send to slack:
    if tw_count >= (lw_count + lw_count * perc_increase):
        slackAPI.send_message(f'''ALERT! :warning: 
Domain: _{domain}_ | Url: _{url}_ | Time: _{time.asctime()}_
This week's requests exceded last week by the given percentage:
```- This week's count: {tw_count}
- Last week's count: {lw_count}
- Percentage threshold: {perc_increase}
- Actual percentage increase: {round(tw_count/lw_count*100-100, 2)}%```
<{gsheetAPI.gsheet_link}|Attack monitor sheet>
<{looker_url}|Looker Dashboard>''')

In [32]:
# --------- #
# MAIN LOOP #
# --------- #
if __name__ == '__main__':
    while True:
        # Retrieve the sheet and add the queries
        gsheet_df = gsheetAPI.retrieve_sheet_as_df()
        gsheet_df_queries = athenaAPI.add_row_queries(gsheet_df)
        write_log(f'Data from Google Sheet retrieved at {time.asctime()}')

        # Prepare a list of dicts
        row_list = [row for row in gsheet_df_queries.iterrows() if check_row_validity(row)]

        # Multiprocessing to run the queries simultaneously, it has to stay in the main function
        # The argument is the full dictionary, I need most of the arguments
        write_log(f'Beginning multiprocessing at {time.asctime()}, running {len(row_list) * 2} queries on {mp.cpu_count()} threads')
        time_start = time.time()
        with mp.Pool(mp.cpu_count()) as pool:
            results = [pool.apply(get_data, args=row) for row in row_list]

        # Print how long it took for a full cycle
        exec_time = "{:.2f}".format(time.time() - time_start)
        write_log(f'Multiprocessing completed in {exec_time} seconds')

KeyError: 'url'

In [18]:
[x for x in range(1,7)]

[1, 2, 3, 4, 5, 6]

In [14]:
#output = []
for x in range(1,7):
    for y in range(1,7):
        ooutput.append(x + y)

NameError: name 'ooutput' is not defined

In [27]:
output_sorted = sorted([x + y for x in range(1,7) for y in range(1,7)])

# Il set e' una struttura dove non ci possono essere duplicati
output_no_duplicates = set(output_sorted)

for n in output_no_duplicates:
    print('il numero', n , 'compare', output_sorted.count(n), 'volta/e')

il numero 2 compare 1 volta/e
il numero 3 compare 2 volta/e
il numero 4 compare 3 volta/e
il numero 5 compare 4 volta/e
il numero 6 compare 5 volta/e
il numero 7 compare 6 volta/e
il numero 8 compare 5 volta/e
il numero 9 compare 4 volta/e
il numero 10 compare 3 volta/e
il numero 11 compare 2 volta/e
il numero 12 compare 1 volta/e


In [31]:
gsheet_df

Unnamed: 0,name,domain,perc_increase,account_id,site_id,url_match,and_condition,note,attack_category,owner,tw_count,lw_count,last_execution,client_message
0,first test attack alerting,mercury.worldremit.com,25%,2e8d6f13-c651-46ca-bfe5-2293e4586ede,32261b05-3a41-45df-ae4c-199945da4c96,/auth/login,AND action is NULL,test to check if this works,,Atul,1367,1430,Wed Apr 15 11:26:50 2020,Successful
1,first test attack alerting,www.realtor.com,15%,20f61841-e844-4532-8c08-dcd517daabb8,f44b7a05-3268-451b-ae56-79dddd9199c9,realestat,AND action is NULL,test to check if this works,,Emanuele,1153808,982558,Wed Apr 15 11:27:12 2020,Successful
2,first test attack alerting,www.realtor.com,10%,20f61841-e844-4532-8c08-dcd517daabb8,f44b7a05-3268-451b-ae56-79dddd9199c9,realestateandhomes-detai,AND action is NULL,test to check if this works,,Emanuele,87570,58652,Wed Apr 15 11:27:35 2020,Successful
3,first test attack alerting,www.realtor.com,5%,20f61841-e844-4532-8c08-dcd517daabb8,f44b7a05-3268-451b-ae56-79dddd9199c9,,AND action is NULL,test to check if this works,,Emanuele,1298537,1171646,Wed Apr 15 11:27:55 2020,Successful
4,first test attack alerting,www.realtor.com,20%,20f61841-e844-4532-8c08-dcd517daabb8,f44b7a05-3268-451b-ae56-79dddd9199c9,sell,AND action is NULL,test to check if this works,,Emanuele,203,74,Wed Apr 15 10:16:56 2020,Successful


In [9]:
output = []
for x, y in range(1,7), range(1,7):
    output.append(x + y)

ValueError: too many values to unpack (expected 2)

In [29]:
import requests

In [None]:
curl -s --data "api_id=37360&api_key=91391cbe-9df1-4eee-94c0-3eb05b7a11eb&domain=butlerlab.ml&site_ip=2.2.4.4" 

In [34]:
API_ENDPOINT = 'https://my.imperva.com/api/prov/v1/sites/add'
data = {'api_id':'37360',
        'api_key':'91391cbe-9df1-4eee-94c0-3eb05b7a11eb',
        'domain':'butlerlab.ml',
        'site_ip':'2.2.4.4'}
r = requests.post(url = API_ENDPOINT, data = data) 

In [35]:
r.json()

{'site_id': 89719453,
 'status': 'pending-dns-changes',
 'domain': 'www.butlerlab.ml',
 'account_id': 1423865,
 'acceleration_level': 'advanced',
 'site_creation_date': 1586967799000,
 'ips': ['2.2.4.4'],
 'dns': [{'dns_record_name': 'www.butlerlab.ml',
   'set_type_to': 'CNAME',
   'set_data_to': ['sy4b6pj.x.incapdns.net']},
  {'dns_record_name': 'butlerlab.ml',
   'set_type_to': 'A',
   'set_data_to': ['45.223.19.233', '45.223.25.233']}],
 'original_dns': [{'dns_record_name': 'butlerlab.ml',
   'set_type_to': 'A',
   'set_data_to': ['2.2.4.4']},
  {'dns_record_name': 'www.butlerlab.ml',
   'set_type_to': 'A',
   'set_data_to': ['2.2.4.4']}],
 'active': 'active',
 'support_all_tls_versions': False,
 'use_wildcard_san_instead_of_full_domain_san': True,
 'add_naked_domain_san': True,
 'additionalErrors': [],
 'display_name': 'www.butlerlab.ml',
 'security': {'waf': {'rules': [{'action': 'api.threats.action.block_request',
     'action_text': 'Block Request',
     'id': 'api.threats.sql_

In [None]:
curl -s --data "api_id=37360&api_key=91391cbe-9df1-4eee-94c0-3eb05b7a11eb&site_id=90456147" https://my.imperva.com/api/prov/v1/sites/status

In [36]:
API_ENDPOINT = 'https://my.imperva.com/api/prov/v1/sites/status'
data = {'api_id':'37360',
        'api_key':'91391cbe-9df1-4eee-94c0-3eb05b7a11eb',
        'site_id':'90456147'}
r = requests.post(url = API_ENDPOINT, data = data) 

In [37]:
r.json()

{'site_id': 90456147,
 'statusEnum': 'fully_configured',
 'status': 'fully-configured',
 'domain': 'a-team.mscloudwaf.com',
 'account_id': 1423865,
 'acceleration_level': 'advanced',
 'site_creation_date': 1584031749000,
 'ips': ['a-team.mscloudwaf.com.s3-website-us-west-1.amazonaws.com'],
 'dns': [{'dns_record_name': 'a-team.mscloudwaf.com',
   'set_type_to': 'CNAME',
   'set_data_to': ['ncpuywo.x.incapdns.net']}],
 'original_dns': [{'dns_record_name': 'Not Available', 'set_data_to': []}],
 'active': 'active',
 'support_all_tls_versions': False,
 'use_wildcard_san_instead_of_full_domain_san': True,
 'add_naked_domain_san': True,
 'additionalErrors': [],
 'display_name': 'a-team.mscloudwaf.com',
 'security': {'waf': {'rules': [{'action': 'api.threats.action.block_request',
     'action_text': 'Block Request',
     'id': 'api.threats.sql_injection',
     'name': 'SQL Injection'},
    {'action': 'api.threats.action.alert',
     'action_text': 'Alert Only',
     'id': 'api.threats.cross_s

In [38]:
numbers=list(range(1,6))
print(numbers)

[1, 2, 3, 4, 5]
