<h1> DattoRMM - Software Cleanup </h1>

# Import Modules

In [None]:
# Data Shaping
import pandas as pd
import re
import datetime as dt

# Data ingestion
import requests

# File Handling and Export
import json
import csv

In [None]:
# add current timestamp to filename for reference
current_time = (dt.datetime.utcnow().strftime('%Y_%m_%d_%H%M%S'))

# git repo folder
git_folder = 'd:/git/example_infrastructure_data_dev'

# export folder will contain all csv exported DataFrames for Ticket Creation
export_folder = 'd:/exports/'

In [None]:
df_audit = pd.read_csv(".csv")

In [None]:
df_audit

### Import ConfigParser and Create env Variables

In [None]:
# import configparser for env secrets
from configparser import ConfigParser

config = ConfigParser()
config.read(f'{git_folder}/config/env.ini')
from requests.structures import CaseInsensitiveDict

In [None]:
# import and assign secrets from env.ini

dattormm_config  = config['dattormm']

# Create Datto RMM Device DataFrame

## Create auth token

In [None]:
# call token api url
token_uri = f"{dattormm_config['base_uri']}/auth/oauth/token"


# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# construct req body
data = CaseInsensitiveDict()
data['grant_type'] = 'password'
data['username'] = dattormm_config['api_key']
data['password'] = dattormm_config['api_secret']

# request content response
resp = requests.post(token_uri, headers=headers, data=data, auth=('public-client', 'public'))
content = resp.content.decode('utf-8')
c_dict = json.loads(content)

access_token = c_dict['access_token']

## Create DataFrame via API Call Iteration


In [None]:

## Create Devices DataFrame
# request content response
request_url = f"{dattormm_config['base_uri']}/api/v2/account/devices"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''

print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)


# iterate and combine remaining pages
df_devices = pd.DataFrame(c_dict['devices'])
while c_dict['pageDetails']['nextPageUrl']:
    next_page = c_dict['pageDetails']['nextPageUrl']
    resp = requests.get(next_page, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    df_current_page = pd.DataFrame(c_dict['devices'])
    df_devices = pd.concat([df_devices, df_current_page], ignore_index=False)

# Data Shaping

## Create New Columns from Dictionary Columns

### Type | Category

In [None]:
def device_category(device):
    if device == None:
        return None
    else:
        return device['category']

In [None]:
def device_type(device):
    if device == None:
        return None
    else:
        return device['type']

In [None]:
df_devices['category'] = df_devices['deviceType'].apply(device_category)
df_devices['type'] = df_devices['deviceType'].apply(device_type)

In [None]:
df_devices.drop(columns='deviceType',inplace=True)

## Create Time Columns and Timedate Shaping

### Add Timezone Column from UDF

In [None]:
# Timezone
def local_timezone(udf):
    return udf['udf10']

df_devices['localTimezone'] = df_devices['udf'].apply(local_timezone)

# drop udf {inplace=True}
df_devices.drop('udf',axis=1,inplace=True)

### Create Date Correlation Columns

In [None]:
# all date columns
parse_dates =  ['lastAuditDate','lastSeen','lastReboot','creationDate',]

### Convert Epoch to UTC

In [None]:
df_devices['lastAuditDate'] = pd.to_datetime(df_devices['lastAuditDate'],unit='ms',errors='coerce')
#df_devices['lastAuditDate'].head(5)

In [None]:
df_devices['lastSeen'] = pd.to_datetime(df_devices['lastSeen'],unit='ms',errors='coerce')
#df_devices['lastSeen'].head(5)

In [None]:
df_devices['creationDate'] = pd.to_datetime(df_devices['creationDate'],unit='ms',errors='coerce')
#df_devices['creationDate'].head(5)

In [None]:
df_devices['lastReboot'] = pd.to_datetime(df_devices['lastReboot'],unit='ms',errors='coerce')
#df_devices['lastReboot'].head(5)

### Define and apply functions to create correlation columns

In [None]:
def no_audit_7_days(last_audit):
    if last_audit < dt.datetime.now() - dt.timedelta(days=7):
        return 1
    else:
        return 0

In [None]:
def offline_30_days(last_seen):
    if last_seen < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
def no_reboot_30_days(last_reboot):
    if last_reboot < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
# Create Column - Devices Last Audit > 7 days
df_devices['noAudit7Days'] = df_devices['lastAuditDate'].apply(no_audit_7_days)

In [None]:
# Create Column - Devices Offline 30 Days
df_devices['offline30Days'] = df_devices['lastSeen'].apply(offline_30_days)

In [None]:
# Create Column - Last Reboot Extended Duration and Online without Reboot Extended Duration
df_devices['noReboot30Days'] = df_devices['lastReboot'].apply(no_reboot_30_days)

## DattoRMM DataFrame Data Standardization Shaping

### Hostname to_upper()

In [None]:
df_devices['hostname'] = df_devices['hostname'].str.upper()

### Replace Dtypes with Int64

In [None]:
convert_to_int_mask = ((df_devices.dtypes == 'float') | (df_devices.dtypes == 'bool') | (df_devices.dtypes == 'uint8')) & (df_devices.columns != 'patchStatusPercent')
convert_to_int = df_devices.dtypes[convert_to_int_mask].index.tolist()

In [None]:
df_devices[convert_to_int] = df_devices[convert_to_int].astype('Int64')

In [None]:
df_devices = df_devices[['uid','siteName','hostname','intIpAddress','operatingSystem','category','domain','lastSeen','lastAuditDate']]

In [None]:
df_devices = df_devices[df_devices['siteName'] == '[REDACTED]']

# Create Software Version DataFrame

## Prepare Data for recall and shaping

## Create Software DataFrame

* Because of the high volume of data from pulling all software and version for each device,
    a method of storing each call response in a JSON document then calling it back keeps memory usage low as objects in memory are created then destroyed in memory but retrained in restful data.
* The same idea is used when calling data out of JSON into a concatenated dataframe after splitting the data up from a nested 'software' dictionary.
* Although this may seem counterintuitive, because each device has different software installed, this method was preferred over using a database because it can be used by anyone.
* If a db or json was not used, there would be an error on each row as columns will never match without the pandas algorithm of the concatenate function 'concat'


### Import keywords dict for removing version numbers from software names

In [None]:
version_keyword_list = []

with open('d:/git/example_infrastructure_data_dev/dictionaries/remove_software_version.dict') as version_keywords:
    csv_reader = csv.reader(version_keywords)
    for row in csv_reader:
        version_keyword_list.append(str(row[0]))

version_keyword_list = version_keyword_list[1:]

### FUNCTION: Explode Software Names / Versions into Columns

In [None]:
def explode_software(software):
    software_dict = {}
    software_name = software['name']
    software_version = software['version']
    software_dict[software_name] = software_version
    return software_dict

In [None]:
def strip_version_from_title(string):

    print(f'Software before: {string}')

    for kw in version_keyword_list:
        string = re.sub(kw,'',string)
        string = re.sub(r'\s\s',' ',string)
        string = string.rstrip().lstrip()

    print(f'Software after: {string}')
    return string

In [None]:
def software_api_req(row):
    # request content response
    request_url = f"{dattormm_config['base_uri']}/api/v2/audit/device/{row['uid']}/software"

    # construct header
    headers = CaseInsensitiveDict()
    headers['Authorization'] = f'Bearer {access_token}'
    headers['Content-Type'] = 'application/json'

    # construct req body
    data = ''

    print(f'\nRequest URL: {request_url}\n\n')

    resp = requests.get(request_url, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    # Create DB Object for Entry
    object_dict = dict(row)

    # object_list.append(object_dict)
    print(object_dict)

    # Explode and Shape Software Dict List Elements
    for software in c_dict['software']:
        software_entry = explode_software(software)
        for k,v in software_entry.items():
            software_name = strip_version_from_title(k)
            software_name = strip_version_from_title(software_name)
            if software_name:
                object_dict[software_name] = v

    print('*'*50)

    return object_dict

In [None]:
df_software = pd.DataFrame()

devices_software_list = []

for index, row in df_devices.iterrows():

    devices_software_list.append(software_api_req(row))


df_software = pd.DataFrame(devices_software_list)

# Shape Software DataFrame

### Set Index to device UID

In [None]:
df_software.set_index('uid', inplace=True)

# Create Software Counts DataFrame

In [None]:
device_columns = ['siteName','hostname','intIpAddress','operatingSystem','category','domain','lastSeen','lastAuditDate']

In [None]:
software_counts_dict = {}

for index,row in df_software.fillna(0).drop(device_columns,axis=1).iterrows():
    for k,v in row.items():
        if v != 0:
            count = software_counts_dict.get(k,0)
            if count == 0:
                software_counts_dict[k] = 1
            else:
                software_counts_dict[k] = software_counts_dict.get(k,0) + 1



In [None]:
df_counts = pd.DataFrame(software_counts_dict.items(),columns=['softwareName','count'])

# Export DataFrame to CSV

In [None]:
df_counts.to_csv('.csv',index=False)

In [None]:
df_software.to_csv('.csv',index=False)