<h1> HaloPSA - False Alerts by Resolution Category </h1>

In [None]:
#data conditioning
import pandas as pd
import numpy as np
import re
import datetime as dt

# data import and file manipulation
import os
import json
import csv
import xlrd
import zipfile


# API and Web Requests
import requests
from requests.structures import CaseInsensitiveDict
import urllib3 # make url requests
import shutil # manage packages

#data visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# add current timestamp to filename for reference
current_time = (dt.datetime.utcnow().strftime('%Y_%m_%d_%H%M%S'))

# git repo folder
git_folder = 'd:/git/example_infrastructure_data_dev'

# export folder will contain all csv exported DataFrames for Ticket Creation
export_folder = 'd:/exports/'

# import configparser for env secrets
from configparser import ConfigParser

config = ConfigParser()
config.read(f'{git_folder}/config/env.ini')
import requests
from requests.structures import CaseInsensitiveDict
# import and assign secrets from env.ini
halopsa = config['halopsa']


## Create Auth Token

In [None]:
# call token api url
token_uri = f"{halopsa['base_uri']}/auth/token?tenant=example"

# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'


# construct req body
data = CaseInsensitiveDict()
data['grant_type'] = 'client_credentials'
data['client_id'] = halopsa['client_id']
data['client_secret'] = halopsa['client_secret']
data['scope'] = 'all'

# request content response
resp = requests.post(token_uri, headers=headers, data=data) #,params={'tenant':{halopsa['tenant']}})
content = resp.content.decode('utf-8')
c_dict = json.loads(content)

access_token = c_dict['access_token']

# Create Asset DataFrame

In [None]:
# request content response
request_url = f"{halopsa['base_uri']}/api/Asset"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''

print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)


# iterate and combine remaining pages
df_assets = pd.DataFrame(c_dict['assets'])
try:
    while c_dict['pageDetails']['nextPageUrl']:
        next_page = c_dict['pageDetails']['nextPageUrl']
        resp = requests.get(next_page, headers=headers, data=data)
        content = resp.content.decode('utf-8')
        c_dict = json.loads(content)

        df_current_page = pd.DataFrame(c_dict['assets'])
        df_assets = pd.concat([df_assets, df_current_page], ignore_index=False)
except:
    print(f'All assets on first page.  Total Assets: {c_dict["record_count"]}')

### Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_assets['reportCreationDate'] = report_creation_date

### Export Assets DataFrame to CSV

In [None]:
df_assets.to_csv(export_folder + 'halopsa_full_assets_report_' + str(current_time) + '.csv', index=False)

# Create Agents (Users) DataFrame

In [None]:
# request content response
request_url = f"{halopsa['base_uri']}/api/Agent"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = CaseInsensitiveDict()

# construct req params
params = CaseInsensitiveDict()
params['pageinate'] = True

print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data, params=params)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)


# iterate and combine remaining pages
df_agents = pd.DataFrame(c_dict)

### Convert Timestamp to UTC

In [None]:
df_agents['lastlogin'] = pd.to_datetime(df_agents['lastlogindate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_agents.drop('lastlogindate',axis = 1, inplace = True)

### Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_agents['reportCreationDate'] = report_creation_date

### Export Agents DataFrame to CSV

In [None]:
df_agents.to_csv(export_folder + 'halopsa_full_agent_report_' + str(current_time) + '.csv', index=False)

# Create Tickets DataFrame

## Create Ticket Type Dictionary

In [None]:
# halo resource
resource = 'TicketType'


# construct params
params = CaseInsensitiveDict()

In [None]:
# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# request content response
request_url = f"{halopsa['base_uri']}/api/{resource}"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''



print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data, params=params)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)
df_tickettype = pd.DataFrame(c_dict)

In [None]:
tickettype_dict = {}
for index, row in df_tickettype.iterrows():
    tickettype_dict[row['id']] = row['name']

## Create Ticket Status Description Dictionary

In [None]:
# halo resource
resource = 'Status'

# construct params
params = CaseInsensitiveDict()

In [None]:
# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# request content response
request_url = f"{halopsa['base_uri']}/api/{resource}"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''


print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data, params=params)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)
df_ticketstatus = pd.DataFrame(c_dict)

In [None]:
ticketstatus_dict = {}
for index, row in df_ticketstatus.iterrows():
    ticketstatus_dict[row['id']] = row['name']

## Create Ticket SLA Dictionary

## Create Tickets DataFrame

In [None]:
# halo resource
resource = 'Tickets'


# construct params
params = CaseInsensitiveDict()
params['paginate'] = True
params['count'] = 100000

In [None]:
# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# request content response
request_url = f"{halopsa['base_uri']}/api/{resource}"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''





print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data, params=params)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)
df_tickets = pd.DataFrame(c_dict['tickets'])

### Convert Timestamp to UTC

In [None]:
df_timefix = pd.DataFrame()


df_timefix['dateoccurred'] = pd.to_datetime(df_tickets['dateoccurred'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['dateoccurred'] = df_timefix['dateoccurred']
df_timefix['respondbydate'] = pd.to_datetime(df_tickets['respondbydate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['respondbydate'] = df_timefix['respondbydate']
df_timefix['responsedate'] = pd.to_datetime(df_tickets['responsedate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['responsedate'] = df_timefix['responsedate']
df_timefix['lastactiondate'] = pd.to_datetime(df_tickets['lastactiondate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['lastactiondate'] = df_timefix['lastactiondate']
df_timefix['responsedate'] = pd.to_datetime(df_tickets['responsedate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['responsedate'] = df_timefix['responsedate']
df_timefix['lastincomingemail'] = pd.to_datetime(df_tickets['lastincomingemail'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['lastincomingemail'] = df_timefix['lastincomingemail']
df_timefix['deadlinedate'] = pd.to_datetime(df_tickets['deadlinedate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['deadlinedate'] = df_timefix['deadlinedate']
df_timefix['dateclosed'] = pd.to_datetime(df_tickets['dateclosed'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['dateclosed'] = df_timefix['dateclosed']
df_timefix['startdate'] = pd.to_datetime(df_tickets['startdate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['startdate'] = df_timefix['startdate']
df_timefix['targetdate'] = pd.to_datetime(df_tickets['targetdate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['targetdate'] = df_timefix['targetdate']
df_timefix['dateassigned'] = pd.to_datetime(df_tickets['dateassigned'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['dateassigned'] = df_timefix['dateassigned']
df_timefix['fixbydate'] = pd.to_datetime(df_tickets['fixbydate'], unit='ns', errors='coerce').values.astype('datetime64[s]')
df_tickets['fixbydate'] = df_timefix['fixbydate']

### Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_tickets['reportCreationDate'] = report_creation_date

## Rename _id to _id.name

In [None]:
df_tickets['tickettype'] = df_tickets['tickettype_id'].replace(tickettype_dict)

In [None]:
df_tickets['ticketstatus'] = df_tickets['status_id'].replace(ticketstatus_dict)

### Fill 'Details' NA with ""

In [None]:
df_tickets['details'].fillna("",inplace=True)

# Create Ticket Dataframe for Last 30 Days

In [None]:
df_tickets_last30days = df_tickets[df_tickets['dateoccurred'] > dt.datetime.now() - dt.timedelta(days=30)]

In [None]:
df_tickets_last30days['dayoccurred'] = df_tickets_last30days.iloc[:]['dateoccurred'].dt.to_period('d')

In [None]:
df_tickets_last30days = df_tickets_last30days.drop('dateoccurred',axis=1)

## Distribution of Ticket Types over 30 days

In [None]:
plt.figure(figsize=(80,40))
sns.countplot(data=df_tickets_last30days.sort_values('dayoccurred'),x='dayoccurred',hue='tickettype')
plt.savefig(export_folder + 'halopsa_alerts_last30days_dist_' + str(current_time) + '.png')

# Alert Ticket Analysis and Metrics

In [None]:
df_alert_tickets_last30days = df_tickets_last30days[df_tickets_last30days['tickettype'] == 'Alert']

## Assign Alert Source Based on Email Address Embedded in HTML Body

### Import parse keyword dictionary

In [None]:
df = pd.read_csv(f'{git_folder}/dictionaries/halopsa_details_regex_ticket_source_dictionary.dict')
details_parse_list = []
for index,row in df.iterrows():
    row_dict = {}
    keyphrase = row['keyPhrase']
    emailSource = row['emailSource']
    row_dict[keyphrase] = emailSource
    details_parse_list.append(row_dict)

In [None]:
details_parse_list

### Define Parse Functions

In [None]:
def parse_from_dictionary(string):
    for prog in details_parse_list:
        for k,v in prog.items():
            result = re.search(k,string)
            if result:
                result = str(v)
                result_list = re.findall(r'\,?([^\,]+)\,?',result)
                lower_list = []
                for e in result_list:
                    lower_list.append(e.lower())
                print(lower_list)
                return lower_list
                break
            else:
                break

In [None]:
def parse_source_email_domain(string):
    parse_embedded_email_prog = re.compile(r'(@{1}[^\.]+\.[^\s\r\n\!]+)')
    result = set(parse_embedded_email_prog.findall(string))
    if result:
        dedupe_list = list(result)
        lower_list = []
        for e in dedupe_list:
            lower_list.append(e.lower())
        print(lower_list)
        return lower_list

In [None]:
def parse_source_url(string):
    parse_embedded_url_prog = re.compile(r'.*urldefense.proofpoint.com[^\_]+[\_\.]+([^\_\s\/]{2,20}\.[^\_\.\s\/]{3,20}\.[^\_\s\d\/\&\-]*)[\_]?')
    result = set(parse_embedded_url_prog.findall(string))
    if result:
        dedupe_list = list(result)
        lower_list = []
        for e in dedupe_list:
            lower_list.append(e.lower())
        print(lower_list)
        return lower_list

## Combine all Functions to be use on Any Column

In [None]:
parse_functions_list = [parse_source_url,parse_from_dictionary,parse_source_email_domain]

In [None]:
def return_email_parse_details(details):
    i = 1
    for func in parse_functions_list:
        print("trying function ", i)
        try:
            result = func(details)
            i = i + 1
            if result:
                return result
                break
        except Exception as e:
            print(e)
            break

In [None]:
df_alert_tickets_last30days['sourceParseResults'] = df_alert_tickets_last30days['details'].apply(return_email_parse_details)

In [None]:
df_alert_tickets_last30days['sourceParseResults'].fillna(r"['COULD NOT PARSE']",inplace=True)

### Create Root Domain Column from Details Parse Column for ValueCounts by Domain

In [None]:
def root_domain_parse(string):
    root_domain_prog = re.compile(r'([^\@\.]+)[\.\@]{1}[\w\d]{2,10}$')
    result = root_domain_prog.findall(string[0])
    try:
        return result[0]
    except:
        # print(result)
        pass

In [None]:
df_alert_tickets_last30days['rootParse'] = df_alert_tickets_last30days['sourceParseResults'].apply(root_domain_parse)

In [None]:
df_alert_tickets_last30days_rootParse = df_alert_tickets_last30days

In [None]:
df_alert_tickets_last30days_rootParse['rootParse'].fillna('COULD NOT PARSE')

### Export to CSV and Plot Chart

In [None]:
df_alert_tickets_last30days_rootParse.to_csv(export_folder + 'halopsa_alerts_last30days_parsed_' + str(current_time) + '.csv')

In [None]:
df_alert_tickets_last30days_rootParse

In [None]:
plt.figure(figsize=(80,40))
sns.countplot(data=df_alert_tickets_last30days_rootParse.sort_values('dayoccurred'),x='dayoccurred',hue='rootParse')

In [None]:
df_alert_tickets_last30days.to_csv('.csv')

In [None]:
df

## Cut Alerts Summaries into Sections by "-" Delimeter

In [None]:
alerts_first_cut_prog = re.compile(r'\s?([^\-]+)\s?')

In [None]:
alerts_summary_list = []

In [None]:
for summary in df_alert_tickets_last30days['summary']:
    for element in alerts_first_cut_prog.findall(summary):
        alerts_summary_list.append(element)

In [None]:
df = pd.DataFrame(alerts_summary_list)

In [None]:
df.rename({0:'cuts'},inplace=True,axis=1)

In [None]:
df

In [None]:
top_alert_cuts = list(df.value_counts().sort_values(ascending=False).head(10).reset_index()['cuts'])

In [None]:
top_alert_cuts

In [None]:
for index, cut in enumerate(top_alert_cuts):
    alert_cut_dict = {}
    alert_cut_dict['index'] = index
    alert_cut_dict['keyword'] = cut
    print(alert_cut_dict)

    df = df_alert_tickets_last30days[df_alert_tickets_last30days['summary'].str.contains(cut)]
    df.insert(0,'[KEYWORD]',cut)
    df.to_csv(export_folder + 'top_alert_summary_cuts_' + str(index) +'of' + str(len(top_alert_cuts)) + '_' + str(current_time) + '.csv', index=False)

In [None]:
df_alert_tickets_last30days