<h1> N-Able - Import / Concat / Analyze Trends </h1>

# Import Modules, Create env Variables

In [None]:
# data import and file manipulation
import os
import requests
from requests.structures import CaseInsensitiveDict
import json
import csv
import xlrd

#data conditioning
import pandas as pd
import numpy as np
import re
import datetime as dt

#data visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# add current timestamp to filename for reference
current_time = (dt.datetime.utcnow().strftime('%Y_%m_%d_%H%M%S'))

# git repo folder
git_folder = 'd:/git/example_infrastructure_data_dev'

# dictionary Directory
dictionary_dir = 'd:/git/example_infrastructure_data_dev/dictionaries'

# export folder will contain all csv exported DataFrames for Ticket Creation
export_folder = 'd:/exports'

In [None]:
source_dir = 'D:/project_docs/abc_nable_migration/abc_nable_exports/system_audit_logs_last_2mo'

export_dir = 'D:/project_docs/abc_nable_migration/raw_exports'

In [None]:
source_files = []
for root, dirs, files in os.walk(source_dir):
    for file in files:
        source_files.append(f"{root}/{file}")

In [None]:
source_files

In [None]:
source_files_str = """
['.csv',
 '.csv',
 '.csv',
 '.csv',
 '.csv']
"""

# Create Login / Logout DataFrame

In [None]:
df_login = pd.read_csv('.csv',skiprows=12,index_col='#')

In [None]:
df_login

In [None]:
df_logout = pd.read_csv('.csv',skiprows=12,index_col='#')

In [None]:
df_login_activity = pd.concat([df_login,df_logout],ignore_index=True)

In [None]:
df_timefix = pd.to_datetime(df_login_activity['Date and Time (HH:MM)'],unit='ns')
df_login_activity['Date and Time (HH:MM)'] = df_timefix.values.astype('datetime64[s]')

In [None]:
def login_success(string):
    success_result = re.search(r'success',string.lower())
    if success_result:
        return ('login:successful')
    else:
        invalid_pin = re.search(r'pin',string.lower())
        invalid_password = re.search(r'password',string.lower())
        if invalid_pin:
            return ('login:bad pin entered')
        elif invalid_password:
            return ('login:bad password entered')
        else:
            return ('logout:successful')

In [None]:
df_login_activity['activityResult'] = df_login_activity['Details'].apply(login_success)

# Data Shaping

## Standardize Column Names

In [None]:
df = pd.read_csv(f'{dictionary_dir}/standard_column_naming.dict')
column_rename_dict = {}
for index, row in df.iterrows():
    currentColumn = row['currentColumn']
    standardColumn = row['standardColumn']
    column_rename_dict[currentColumn] = standardColumn

In [None]:
def column_names(string):
    for k, v in column_rename_dict.items():
        try:
            result = re.sub(k.lower(), v, string.lower())
            print(f'comparing {k.lower()} with {string.lower()}')
            if result != string.lower():
                print(f'Keyword found: {k}')
                print(f'Replacement value: {v}')
                print('\n')
                return v
                break
            elif k.lower() == string.lower():
                print(f'Keyword found: {k}')
                print(f'Replacement value: {v}')
                print('\n')
                return v
                break
        except Exception as e:
            print(e)
            break
    return string

In [None]:
for column in list(df_login_activity.columns):
    result = column_names(column)
    df_login_activity.rename({column:result},axis=1,inplace=True)

## Standardize Client Names

In [None]:
cu_dict = {'Federal Credit Union':'FCU','Credit Union':'CU'}
def reword_creditunion(string):
    for k, v in cu_dict.items():
        result = re.sub(k,v,string)
        if result != string:
            return (result)
            break
    return string

In [None]:
df_login_activity['clientName'] = df_login_activity['clientName'].apply(reword_creditunion)

In [None]:
def fill_client_names(string):
    if string == '--':
        return 'All Clients'
    else:
        return string

In [None]:
df_login_activity['clientName'] = df_login_activity['clientName'].apply(fill_client_names)

In [None]:
df_login_activity.drop('siteName',inplace=True,axis=1)

In [None]:
df_login_activity

## Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_login_activity['reportCreationDate'] = report_creation_date

In [None]:
df_login_activity.to_csv(export_folder + '/nable_login_logout_audit_logs_' + str(current_time) + '.csv',index=False)

In [None]:
df_login_activity[df_login_activity['activityResult'] == 'login:successful']['emailAddress'].value_counts()

In [None]:
df_login_activity['dayOccurred'] = df_login_activity.iloc[:]['timestamp'].dt.to_period('d')
df_login_activity['monthOccurred'] = df_login_activity.iloc[:]['timestamp'].dt.to_period('m')

In [None]:
fig = plt.figure(figsize=(100,30))
sns.countplot(data = df_login_activity[df_login_activity['activityResult'] == 'login:successful'].sort_values('clientName',ascending=True),x = 'monthOccurred',hue = 'emailAddress')

# Create Activity DataFrame

In [None]:
df_add = pd.read_csv('.csv',skiprows=11,index_col='#')
df_delete = pd.read_csv('.csv',skiprows=11,index_col='#')
df_modify = pd.read_csv('.csv',skiprows=11,index_col='#')

In [None]:
df_change_log_audit = pd.concat([df_delete,df_add],ignore_index=True)

In [None]:
df_change_log_audit = pd.concat([df_modify,df_change_log_audit],ignore_index=True)

# Data Shaping

## Standardize Column Names

In [None]:
for column in list(df_change_log_audit.columns):
    result = column_names(column)
    df_change_log_audit.rename({column:result},axis=1,inplace=True)

## Standardize Client Names

In [None]:
df_change_log_audit['clientName'] = df_change_log_audit['clientName'].apply(reword_creditunion)

In [None]:
df_change_log_audit['clientName'] = df_change_log_audit['clientName'].apply(fill_client_names)

In [None]:
df_change_log_audit.drop('siteName',inplace=True,axis=1)

In [None]:
def parse_where(string):
    result = re.search(r'WHERE\:\s\(([^\)]+)\)',string)
    if result:
        return (result.group(1))
    else:
        return ''

In [None]:
def actionScope(string):
    result = re.search(r'^([^\s]+)',string)
    if result:
        return (result.group(1))

In [None]:
def actionType(string):
    try:
        result = re.search(r'(modification|deletion|addition)',string.lower())
        if result:
            return (result.group(1))
    except:
        return string

In [None]:
df_change_log_audit['auditType'] = df_change_log_audit['auditAction'].apply(actionType)

In [None]:
df_change_log_audit.drop('auditAction',inplace=True,axis=1)

In [None]:
df_change_log_audit

In [None]:
df_change_log_audit['WHERE'] = df_change_log_audit['details'].apply(parse_where)

In [None]:
df_change_log_audit['actionScope'] = df_change_log_audit['details'].apply(actionScope)

In [None]:
df_change_log_audit = df_change_log_audit[['emailAddress','clientName','timestamp','auditType','actionScope','WHERE','details']]

In [None]:
df_change_log_audit

## Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_change_log_audit['reportCreationDate'] = report_creation_date

In [None]:
df_change_log_audit.to_csv(export_folder + '/nable_change_log_audit_2months_' + str(current_time) + '.csv',index=False)

In [None]:
df_change_log_audit[['emailAddress','auditType','actionScope']].value_counts().to_csv(export_folder + '/nable_changes_audit_activity_2months_multi_index_' + str(current_time) + '.csv',index=False)

In [None]:
df_change_log_audit[['emailAddress','auditType','actionScope']].value_counts().sort_index(level=0,ascending=True)