<h1> DattoRMM - Software  </h1>

### Import Core Modules

In [None]:
# data import and file manipulation
import requests
import json

# response parsing
from lxml import etree
from bs4 import BeautifulSoup
from requests_html import AsyncHTMLSession

#data conditioning
import pandas as pd
import numpy as np
import re
import datetime as dt

#data visualization
[REDACTED]/.pyplot as plt
import seaborn as sns

### Create report_time var, git location, export location

In [None]:
# add current timestamp to filename for reference
current_time = (dt.datetime.utcnow().strftime('%Y_%m_%d_%H%M%S'))

# git repo folder
git_folder = 'd:/git/example_infrastructure_data_dev'

# export folder will contain all csv exported DataFrames for Ticket Creation
export_folder = 'd:/exports/'

### Import ConfigParser and Create env Variables

In [None]:
# import configparser for env secrets
from configparser import ConfigParser

config = ConfigParser()
config.read(f'{git_folder}/config/env.ini')
from requests.structures import CaseInsensitiveDict

In [None]:
# import and assign secrets from env.ini

dattormm_config  = config['dattormm']
mongodb_config = config['mongodb']

database = 'seed_data'

### Create Software Report Sites and Software Filter Dictionary

In [None]:
df = pd.read_csv('d:/git/example_infrastructure_data_dev/dictionaries/datto_rmm_software_management_report.dict')
software_mgmt = {}
software_mgmt['siteName'] = list(df['siteName'].dropna())
software_mgmt['softwareInstalled'] = list(df['softwareInstalled'].dropna())

# Create Datto RMM Device DataFrame

## Create auth token

In [None]:
# call token api url
token_uri = f"{dattormm_config['base_uri']}/auth/oauth/token"


# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# construct req body
data = CaseInsensitiveDict()
data['grant_type'] = 'password'
data['username'] = dattormm_config['api_key']
data['password'] = dattormm_config['api_secret']

# request content response
resp = requests.post(token_uri, headers=headers, data=data, auth=('public-client', 'public'))
content = resp.content.decode('utf-8')
c_dict = json.loads(content)

access_token = c_dict['access_token']

## Create DataFrame via API Call Iteration


In [None]:

## Create Devices DataFrame
# request content response
request_url = f"{dattormm_config['base_uri']}/api/v2/account/devices"

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''

print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)


# iterate and combine remaining pages
df_devices = pd.DataFrame(c_dict['devices'])
while c_dict['pageDetails']['nextPageUrl']:
    next_page = c_dict['pageDetails']['nextPageUrl']
    resp = requests.get(next_page, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    df_current_page = pd.DataFrame(c_dict['devices'])
    df_devices = pd.concat([df_devices, df_current_page], ignore_index=False)

# Data Shaping

## Create New Columns from Dictionary Columns

### Type | Category

In [None]:
def device_category(device):
    if device == None:
        return None
    else:
        return device['category']

In [None]:
def device_type(device):
    if device == None:
        return None
    else:
        return device['type']

In [None]:
df_devices['category'] = df_devices['deviceType'].apply(device_category)
df_devices['type'] = df_devices['deviceType'].apply(device_type)

In [None]:
df_devices.drop(columns='deviceType',inplace=True)

## Create Time Columns and Timedate Shaping

### Add Timezone Column from UDF

In [None]:
# Timezone
def local_timezone(udf):
    return udf['udf10']

df_devices['localTimezone'] = df_devices['udf'].apply(local_timezone)

# drop udf {inplace=True}
df_devices.drop('udf',axis=1,inplace=True)

### Create Date Correlation Columns

In [None]:
# all date columns
parse_dates =  ['lastAuditDate','lastSeen','lastReboot','creationDate',]

### Convert Epoch to UTC

In [None]:
df_devices['lastAuditDate'] = pd.to_datetime(df_devices['lastAuditDate'],unit='ms',errors='coerce')
#df_devices['lastAuditDate'].head(5)

In [None]:
df_devices['lastSeen'] = pd.to_datetime(df_devices['lastSeen'],unit='ms',errors='coerce')
#df_devices['lastSeen'].head(5)

In [None]:
df_devices['creationDate'] = pd.to_datetime(df_devices['creationDate'],unit='ms',errors='coerce')
#df_devices['creationDate'].head(5)

In [None]:
df_devices['lastReboot'] = pd.to_datetime(df_devices['lastReboot'],unit='ms',errors='coerce')
#df_devices['lastReboot'].head(5)

### Define and apply functions to create correlation columns

In [None]:
def no_audit_7_days(last_audit):
    if last_audit < dt.datetime.now() - dt.timedelta(days=7):
        return 1
    else:
        return 0

In [None]:
def offline_30_days(last_seen):
    if last_seen < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
def no_reboot_30_days(last_reboot):
    if last_reboot < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
# Create Column - Devices Last Audit > 7 days
df_devices['noAudit7Days'] = df_devices['lastAuditDate'].apply(no_audit_7_days)

In [None]:
# Create Column - Devices Offline 30 Days
df_devices['offline30Days'] = df_devices['lastSeen'].apply(offline_30_days)

In [None]:
# Create Column - Last Reboot Extended Duration and Online without Reboot Extended Duration
df_devices['noReboot30Days'] = df_devices['lastReboot'].apply(no_reboot_30_days)

## DattoRMM DataFrame Data Standardization Shaping

### Hostname to_upper()

In [None]:
df_devices['hostname'] = df_devices['hostname'].str.upper()

### Replace Dtypes with Int64

In [None]:
convert_to_int_mask = ((df_devices.dtypes == 'float') | (df_devices.dtypes == 'bool') | (df_devices.dtypes == 'uint8')) & (df_devices.columns != 'patchStatusPercent')
convert_to_int = df_devices.dtypes[convert_to_int_mask].index.tolist()

In [None]:
df_devices[convert_to_int] = df_devices[convert_to_int].astype('Int64')

In [None]:
df_devices = df_devices[['uid','siteName','hostname','intIpAddress','operatingSystem','category','domain','lastSeen','lastReboot','lastAuditDate','localTimezone','noAudit7Days','offline30Days','portalUrl','softwareStatus']]

## Filter Devices by siteName in 'datto_rmm_software_management_report.dict'

In [None]:
df = pd.read_csv('d:/git/example_infrastructure_data_dev/dictionaries/datto_rmm_software_management_report.dict')
software_mgmt = {}
software_mgmt['siteName'] = list(df['siteName'].dropna())
software_mgmt['softwareInstalled'] = list(df['softwareInstalled'].dropna())
software_mgmt['column'] = list(df['column'].dropna())

# Create Software Version DataFrame

## Prepare Data for recall and shaping

## Create Software DataFrame

* Because of the high volume of data from pulling all software and version for each device,
    a method of storing each call response in a JSON document then calling it back keeps memory usage low as objects in memory are created then destroyed in memory but retrained in restful data.
* The same idea is used when calling data out of JSON into a concatenated dataframe after splitting the data up from a nested 'software' dictionary.
* Although this may seem counterintuitive, because each device has different software installed, this method was preferred over using a database because it can be used by anyone.
* If a db or json was not used, there would be an error on each row as columns will never match without the pandas algorithm of the concatenate function 'concat'


### FUNCTION: Explode Software Names / Versions into Columns

In [None]:
def explode_software(software):
    software_dict = {}
    software_name = software['name']
    software_version = software['version']
    #print(f"Creating dictionary for software: {software_name} with version {software_version}")
    software_dict[software_name] = software_version
    return software_dict

In [None]:
def software_standard_filter(string):
    for index, e in enumerate(software_mgmt['softwareInstalled']):
        try:
            result = re.findall(e.lower(),string.lower())
            if result:
                print(f'Keyword found: {e}')
                return software_mgmt['column'][index]
                break

        except Exception as e:
            break

In [None]:
def software_api_req(row):
    # request content response
    request_url = f"{dattormm_config['base_uri']}/api/v2/audit/device/{row['uid']}/software"

    # construct header
    headers = CaseInsensitiveDict()
    headers['Authorization'] = f'Bearer {access_token}'
    headers['Content-Type'] = 'application/json'

    # construct req body
    data = ''

    print(f'\nRequest URL: {request_url}\n\n')

    resp = requests.get(request_url, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    # Create DB Object for Entry
    object_dict = dict(row)

    # object_list.append(object_dict)
    print(object_dict)

    # Explode and Shape Software Dict List Elements
    for software in c_dict['software']:
        software_entry = explode_software(software)
        for k,v in software_entry.items():
            software_name = software_standard_filter(k)
            if software_name:
                object_dict[software_name] = v

    print('*'*50)

    return object_dict

In [None]:
df_software = pd.DataFrame()

devices_software_list = []

for index, row in df_devices.iterrows():

    devices_software_list.append(software_api_req(row))


df_software = pd.DataFrame(devices_software_list)

# Shape Software DataFrame

### Set Index to device UID

In [None]:
df_software.set_index('uid', inplace=True)

### FillNA

In [None]:
df_software.fillna(2,inplace=True)

## Create Boolean columns based on Compliance Version

### Adobe Air

In [None]:
request_url = f'https://airsdk.harman.com/download'

asession = AsyncHTMLSession()
r = await asession.get(request_url)
await r.html.arender()
resp=r.html.raw_html

soup = BeautifulSoup(resp)
dom = etree.HTML(str(soup))

result_list = []

for node in dom.xpath('//div'):
    text = ''.join(node.itertext()).strip()
    result = re.findall(r'Download\sAdobe\s(\d+\.\d+)',text)
    if result:
        for r in result:
            result_list.append(r)


latest_adobe_air_version = result_list[0]
df_software['latestAdobeAir'] = latest_adobe_air_version
print(f'Latest Adobe Air Version: {latest_adobe_air_version}')

In [None]:
def is_adobe_air_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_adobe_air_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isAdobeAirCurrent'] = df_software['adobeAir'].apply(is_adobe_air_current)

### 7-Zip

In [None]:
request_url = f'https://www.7-zip.org/download.html'
resp = requests.get(request_url)
soup = BeautifulSoup(resp.content)
dom = etree.HTML(str(soup))

result_list = []

for node in dom.xpath('//b'):
    text = ''.join(node.itertext()).strip()
    result = re.findall(r'7-Zip\s(\d+\.\d+)',text)
    if result:
        result_list.append(result[0])

latest_7zip_version =  (result_list[0] + '.00.0')
df_software['latest7Zip'] = latest_7zip_version
print(f'Latest 7-Zip Version: {latest_7zip_version}')

In [None]:
def is_7zip_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_7zip_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['is7ZipCurrent'] = df_software['7zip'].apply(is_7zip_current)

### Adobe DC Reader

In [None]:
request_url = "https://helpx.adobe.com/acrobat/release-note/release-notes-acrobat-reader.html"
resp = requests.get(request_url)

soup = BeautifulSoup(resp.content)
result = re.findall(r'\((\d+\.\d+\.\d+)\)', str(soup))
latest_adobe_dc_reader_version = result[0]
df_software['latestAdobeDCReader'] = latest_adobe_dc_reader_version
print(f'Latest Acrobat Adobe DC Reader Version: {latest_adobe_dc_reader_version}')

In [None]:
def is_adobe_dc_reader_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_adobe_dc_reader_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isAdobeDCReaderCurrent'] = df_software['adobeAcrobatReaderDC'].apply(is_adobe_dc_reader_current)

### Google Chrome

In [None]:
product = 'chrome'
platform = 'win64'
channel = 'stable'
version = 'all'

result_list = []

request_url = f"https://versionhistory.googleapis.com/v1/{product}/platforms/{platform}/channels/{channel}/versions/{version}/releases"
resp = requests.get(request_url)
results = re.findall(r'\"version\"\:\s\"(\d+\.\d+\.\d+\.\d+)\"',str(resp.content))

latest_chrome_version = results[0]
df_software['latestChrome'] = latest_chrome_version
print(f'Latest Chrome Version: {latest_chrome_version}')

In [None]:
def is_chrome_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_chrome_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isChromeCurrent'] = df_software['googleChrome'].apply(is_chrome_current)

### Mozilla Firefox

In [None]:
request_url = f"[REDACTED]/.json"
resp = requests.get(request_url)
soup = BeautifulSoup(resp.content)
result = re.findall(r'\"LATEST_FIREFOX_VERSION\"\:\s\"(\d+\.\d+\.\d+)\"',str(soup))

latest_firefox_version = result[0]
df_software['latestfirefox'] = latest_firefox_version
print(f'Latest FireFox Version: {latest_firefox_version}')

In [None]:
def is_firefox_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_firefox_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isFirefoxCurrent'] = df_software['mozillaFirefox'].apply(is_firefox_current)

### MS Teams

In [None]:
request_url = f"https://docs.microsoft.com/en-us/officeupdates/teams-app-versioning"
resp = requests.get(request_url)
soup = BeautifulSoup(resp.content)

dom = etree.HTML(str(soup))

for node in dom.xpath('/html/body/div[2]/div/section/div/div[1]/main/div[3]/table[4]/tbody/tr[1]/td[3]'):
    text = ''.join(node.itertext()).strip()

latest_teams_version = text
df_software['latestTeams'] = latest_teams_version
print(f'Latest Teams Version: {latest_teams_version}')

In [None]:
def is_teams_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_teams_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isMSTeamsCurrent'] = df_software['microsoftTeams'].apply(is_teams_current)

### MS Office 365

In [None]:
request_url = f"https://docs.microsoft.com/en-us/officeupdates/update-history-microsoft365-apps-by-date"
resp = requests.get(request_url)
soup = BeautifulSoup(resp.content)

dom = etree.HTML(str(soup))

for node in dom.xpath('/html/body/div[2]/div/section/div/div[1]/main/div[3]/table[1]/tbody/tr[1]/td[3]'):
    text = ''.join(node.itertext()).strip()

latest_office_365_version = text
df_software['latestOffice365'] = latest_office_365_version
print(f'Latest Office 365 Version: {latest_office_365_version}')

In [None]:
def is_ms_office_current(currentVersion):
    if currentVersion != 2:
        if currentVersion == latest_office_365_version:
            return 1
        else:
            return 0
    else:
        return 2

In [None]:
df_software['isMSOfficeCurrent'] = df_software['microsoftOffice365'].apply(is_ms_office_current)

In [None]:
software_report_cols = [
    'siteName', 'hostname', 'intIpAddress', 'operatingSystem', 'category','domain',
    'lastSeen', 'lastReboot', 'lastAuditDate', 'localTimezone',
    'noAudit7Days', 'offline30Days',
    'portalUrl', 'softwareStatus',
    '7zip','latest7Zip', 'is7ZipCurrent',
    'adobeAir', 'latestAdobeAir', 'isAdobeAirCurrent',
    'adobeAcrobatReaderDC', 'latestAdobeDCReader', 'isAdobeDCReaderCurrent',
    'googleChrome', 'latestChrome',  'isChromeCurrent',
    'mozillaFirefox', 'latestfirefox', 'isFirefoxCurrent',
    'mozillaThunderbird',
    'microsoftOffice365', 'latestOffice365', 'isMSOfficeCurrent',
    'microsoftTeams', 'latestTeams', 'isMSTeamsCurrent',
    'fileZillaClient',
    'java',
    'citrixWorkspace',
]

In [None]:
df_software = df_software[software_report_cols]

In [None]:
df_software.replace({2:np.NAN}).to_csv('[REDACTED]/.csv')