<h1> DattoRMM - Software Management Report </h1>

# Import Modules, Create env Variables

In [None]:
# Data Science Modules
import pandas as pd
import numpy as np

# API Call modules
import requests

# Variable Secrets modules
from requests.structures import CaseInsensitiveDict
from configparser import ConfigParser

# Data Shaping Modules
import re
import datetime as dt

# File and DB Manipulation Modules
import json
import csv
import xlrd

# Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
from pymongo import MongoClient
import pymongo

# Data Visualization Modules
[REDACTED]/.pyplot as plt
import seaborn as sns

### Set Export Folder and Import Secrets

In [None]:
# add current timestamp to filename for reference
current_time = (dt.datetime.utcnow().strftime('%Y_%m_%d_%H%M%S'))

# git repo folder
env_file = 'd:/git/example_infrastructure_data_dev'

# export folder will contain all csv exported DataFrames for Ticket Creation
export_folder = 'd:/exports/'

# import configparser for env secrets
config = ConfigParser()
config.read(f'd:/config/env.ini')

### Software Management Service Automation Subscribers

# Create MongoDB Connection Object

In [None]:
mongo_config = config['mongodb']
username = mongo_config['username']
password = mongo_config['password']
connection_ip = mongo_config['connection_ip']
seed_db = 'seed_data'
prod_db = 'software_inventory'

# Provide the mongodb atlas url to connect python to mongodb using pymongo
#CONNECTION_STRING = f"mongodb://{username}:{password}@{connection_ip}/{database}"
CONNECTION_STRING = 'mongodb://localhost:27017'

client = MongoClient(CONNECTION_STRING)

# DataFrame Creation from API
## Prepare for DataFrame Creation

In [None]:
# import and assign secrets from env.ini
datto_config = config['dattormm']
base_uri = datto_config['base_uri']
api_key = datto_config['api_key']
api_secret = datto_config['api_secret']

# call token api url
token_uri = f'{base_uri}/auth/oauth/token'

### Create Access Token


In [None]:
# construct header
headers = CaseInsensitiveDict()
headers['Content-Type'] = 'application/x-www-form-urlencoded'

# construct req body
data = CaseInsensitiveDict()
data['grant_type'] = 'password'
data['username'] = api_key
data['password'] = api_secret

# request content response
resp = requests.post(token_uri, headers=headers, data=data, auth=('public-client', 'public'))
content = resp.content.decode('utf-8')
c_dict = json.loads(content)

access_token = c_dict['access_token']

## Create Devices DataFrame

In [None]:
# request content response
request_url = f'{base_uri}/api/v2/account/devices'

# construct header
headers = CaseInsensitiveDict()
headers['Authorization'] = f'Bearer {access_token}'
headers['Content-Type'] = 'application/json'

# construct req body
data = ''

print(f'Request URL: {request_url}')

resp = requests.get(request_url, headers=headers, data=data)
content = resp.content.decode('utf-8')
c_dict = json.loads(content)


# iterate and combine remaining pages
df_devices = pd.DataFrame(c_dict['devices'])
while c_dict['pageDetails']['nextPageUrl']:
    next_page = c_dict['pageDetails']['nextPageUrl']
    resp = requests.get(next_page, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    df_current_page = pd.DataFrame(c_dict['devices'])
    df_devices = pd.concat([df_devices, df_current_page], ignore_index=False)

# Data Shaping

## Create New Columns from Dictionary Columns

### Set Index to device UID

In [None]:
df_devices.set_index('uid',inplace=True)

### Type | Category

In [None]:
def device_category(device):
    if device == None:
        return None
    else:
        return device['category']

In [None]:
def device_type(device):
    if device == None:
        return None
    else:
        return device['type']

In [None]:
df_devices['category'] = df_devices['deviceType'].apply(device_category)
df_devices['type'] = df_devices['deviceType'].apply(device_type)

# Rename 'type' values to split devices into (2) : 'computer' or 'server'
#df_devices['type'].replace({'Desktop':'computer','Laptop':'computer','Server':'server'},inplace=True)

In [None]:
df_devices.drop(columns='deviceType',inplace=True)

### Patch Managment Breakdown
 patchStatus | patchesApprovedPending | patchesNotApproved | patchesInstalled

In [None]:
# patchStatus
def patch_status(patch_managment):
    return patch_managment['patchStatus']

df_devices['patchStatus'] = df_devices['patchManagement'].apply(patch_status)

# patchesApprovedPending
def patches_approved_pending(patch_management):
    return patch_management['patchesApprovedPending']

df_devices['patchesApprovedPending'] = df_devices['patchManagement'].apply(patches_approved_pending)

# patchesNotApproved
def patches_not_approved(patch_managment):
    return patch_managment['patchesNotApproved']

df_devices['patchesNotApproved'] = df_devices['patchManagement'].apply(patches_not_approved)

# patchesInstalled
def patches_installed(patch_management):
    return patch_management['patchesInstalled']

df_devices['patchesInstalled'] = df_devices['patchManagement'].apply(patches_installed)


# drop patchManagement {inplace=True}
df_devices.drop('patchManagement',axis=1,inplace=True)

### Patch Percent Patched Calculated Column
(Compliance Percentage = PatchesApproved / Patches Installed)

In [None]:
df_devices['patchStatusPercent'] = round( 100 - ((df_devices['patchesApprovedPending'] / ((df_devices['patchesApprovedPending'] + df_devices['patchesInstalled']) ) * 100)),2)

### Sophos AV

In [None]:
#patchStatus
def antivirusProduct(antivirus):
    if antivirus == None:
        return None
    else:
        return antivirus['antivirusProduct']

df_devices['antivirusProduct'] = df_devices['antivirus'].apply(antivirusProduct)

# patchesApprovedPending
def antivirusStatus(antivirus):
    if antivirus == None:
        return None
    else:
        return antivirus['antivirusStatus']

df_devices['antivirusStatus'] = df_devices['antivirus'].apply(antivirusStatus)

df_devices.drop('antivirus',axis=1,inplace=True)

## Create Time Columns and Timedate Shaping

### Add Timezone Column from UDF

In [None]:
# Timezone
def local_timezone(udf):
    return udf['udf10']

df_devices['localTimezone'] = df_devices['udf'].apply(local_timezone)

# drop udf {inplace=True}
df_devices.drop('udf',axis=1,inplace=True)

### Create Date Correlation Columns

In [None]:
# all date columns
parse_dates =  ['lastAuditDate','lastSeen','lastReboot','creationDate',]

### Convert Epoch to UTC

In [None]:
df_devices['lastAuditDate'] = pd.to_datetime(df_devices['lastAuditDate'],unit='ms',errors='coerce')
#df_devices['lastAuditDate'].head(5)

In [None]:
df_devices['lastSeen'] = pd.to_datetime(df_devices['lastSeen'],unit='ms',errors='coerce')
#df_devices['lastSeen'].head(5)

In [None]:
df_devices['creationDate'] = pd.to_datetime(df_devices['creationDate'],unit='ms',errors='coerce')
#df_devices['creationDate'].head(5)

In [None]:
df_devices['lastReboot'] = pd.to_datetime(df_devices['lastReboot'],unit='ms',errors='coerce')
#df_devices['lastReboot'].head(5)

### Define and apply functions to create correlation columns

In [None]:
def no_audit_7_days(last_audit):
    if last_audit < dt.datetime.now() - dt.timedelta(days=7):
        return 1
    else:
        return 0

In [None]:
def offline_30_days(last_seen):
    if last_seen < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
def no_reboot_30_days(last_reboot):
    if last_reboot < dt.datetime.now() - dt.timedelta(days=30):
        return 1
    else:
        return 0

In [None]:
# Create Column - Devices Last Audit > 7 days
df_devices['noAudit7Days'] = df_devices['lastAuditDate'].apply(no_audit_7_days)

In [None]:
# Create Column - Devices Offline 30 Days
df_devices['offline30Days'] = df_devices['lastSeen'].apply(offline_30_days)

In [None]:
# Create Column - Last Reboot Extended Duration and Online without Reboot Extended Duration
df_devices['noReboot30Days'] = df_devices['lastReboot'].apply(no_reboot_30_days)

In [None]:
# Create DF copy for reference
df_raw_data = df_devices

## DattoRMM DataFrame Data Standardization Shaping

### Hostname to_upper()

In [None]:
df_devices['hostname'] = df_devices['hostname'].str.upper()

### Replace Dtypes with Int64

In [None]:
convert_to_int_mask = ((df_devices.dtypes == 'float') | (df_devices.dtypes == 'bool') | (df_devices.dtypes == 'uint8')) & (df_devices.columns != 'patchStatusPercent')
convert_to_int = df_devices.dtypes[convert_to_int_mask].index.tolist()

In [None]:
df_devices[convert_to_int] = df_devices[convert_to_int].astype('Int64')

## Add 'patchStatus' Dummy Columns

In [None]:
df_patch_status = pd.get_dummies(df_devices['patchStatus'],prefix='patchStatus')
df_patch_status.drop('patchStatus_NoPolicy',axis=1, inplace=True)
df_devices = df_devices.join(df_patch_status)
df_devices.drop('patchStatus',axis=1,inplace=True)

### Add Report Creation Date Info Column

In [None]:
report_creation_date = (dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
df_devices['reportCreationDate'] = report_creation_date

# Create Software Version DataFrame

## Prepare Data for recall and shaping

### Software Management Reports

In [None]:
# pull unique site names to add to csv export file title
df_software_management = df_devices[software_management['columns']]
df_software_management.assign(reportCreationDate=current_time)

df_software_management.reset_index(inplace=True)

### Create Software Dataframe to hold all concatenated dataframes from JSON ingestion

In [None]:
df_software = pd.DataFrame()

## Create Software DataFrame

* Because of the high volume of data from pulling all software and version for each device,
    a method of storing each call response in a JSON document then calling it back keeps memory usage low as objects in memory are created then destroyed in memory but retrained in restful data.
* The same idea is used when calling data out of JSON into a concatenated dataframe after splitting the data up from a nested 'software' dictionary.
* Although this may seem counterintuitive, because each device has different software installed, this method was preferred over using a database because it can be used by anyone.
* If a db or json was not used, there would be an error on each row as columns will never match without the pandas algorithm of the concatenate function 'concat'


### FUNCTION: Explode Software Names / Versions into Columns

In [None]:
def explode_software(software):
    software_info[software['name']] = software['version']

In [None]:
for index, row in df_software_management[:5].iterrows():

    # request content response
    request_url = f'{base_uri}/api/v2/audit/device/{row["uid"]}/software'

    # construct header
    headers = CaseInsensitiveDict()
    headers['Authorization'] = f'Bearer {access_token}'
    headers['Content-Type'] = 'application/json'

    # construct req body
    data = ''

    print(f'Request URL: {request_url}')

    resp = requests.get(request_url, headers=headers, data=data)
    content = resp.content.decode('utf-8')
    c_dict = json.loads(content)

    software_dict = {}
    software_dict['deviceUid'] = row["uid"]
    software_dict['software'] = c_dict['software']
    with open('[REDACTED]/.json','w') as file:
        #print(software_dict['deviceUid'])
        file.write(json.dumps(software_dict))
    df = pd.read_json('[REDACTED]/.json',orient='deviceUid')

    software_info = {}
    software_info['deviceUid'] = df['deviceUid'][0]
    df['software'].apply(explode_software)

    df_software_explode = pd.DataFrame(software_info, index=[0]).set_index('deviceUid')
    df_device_software = pd.concat([df_software,df_software_management],ignore_index=False)
    df_software = pd.concat([df_device_software,df_software_explode],ignore_index=False)

In [None]:
df_software.to_csv(export_folder + '[REDACTED]/.csv',index=False)

# Initial Metrics and CSV Creation

## All Fields

In [None]:
df_devices.to_csv(export_folder + 'all_fields_' + str(current_time) + '[REDACTED]/.csv')

### Empty 'Last Audit' Field

In [None]:
df_devices[df_devices['lastAuditDate'].fillna('Missing') == 'Missing'].to_csv(export_folder + 'last_audit_isnull_' + str(current_time) + '[REDACTED]/.csv')

### No Audit > 7 days

In [None]:
df_devices[df_devices['noAudit7Days'] == 1].to_csv(export_folder + 'no_audit_7days_' + str(current_time) + '[REDACTED]/.csv')

### No Audit Since Last Year

In [None]:
df_devices[df_devices['lastAuditDate'].dt.year == dt.datetime.utcnow().year - 1].to_csv(export_folder + 'no_audit_current_yr_' + str(current_time) + '[REDACTED]/.csv')