# Welcome to the Detectify labs tutorial.

## Helpful Tips

Bash commands can be run using an exclamation point.

In [1]:
! whoami

root


In [2]:
! ls

allitems.csv  CVE_Counts_By_Year.html  sample_data


In [None]:
! pip3 install boto3

In [3]:
! echo 'Brevity in Motion learning notebook.' > tmp.txt

In [4]:
! cat tmp.txt

Brevity in Motion learning notebook.


In [None]:
! curl 'https://www.brevityinmotion.com/learning/'

In [None]:
! export AccessKeyId = 'ASIAXXXXXXXXXXXXXXXX'
! export SecretAccessKey = 'XXXXXXXXXXXXXXXXXXXX'
! export SessionToken = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'

! aws sts get-caller-identity

# Markdown example

This is standard text.

``` This is example code. ```

* This is a list item

## Loading additional notebooks

Once you begin to develop a collection of notebooks, I like to separate them based on different functions into the following three types:
* A configuration notebook which installs the relevant packages, configures global variables, and imports the necessary libraries.
* A functions notebook which loads the underlying functions that are commonly called and may be shared across projects.
* A project specific notebook that imports the other two notebooks, which is similar to a code library import command.

In [None]:
def prepareNotebook():
    %run ./brevity-configuration.ipynb
    %run ./brevity-functions.ipynb
    return 'Dependencies successfully loaded.'

loadDependencies = prepareNotebook()
print(loadDependencies)

# Developing the automation
Automating the CVE analysis and metrics.

#### Step 0: Pre-staging the automation.

In [5]:
import pandas as pd
import re
import datetime

#### Step 1: Navigate to the MITRE website and download the latest csv file of CVE.

In [6]:
# Download the csv file from Mitre.
# Setup the download URL as a variable.
wgetcvedownload = 'https://cve.mitre.org/data/downloads/allitems.csv'
# The use of brackets can pass Python variables into bash commands.
! wget {wgetcvedownload}

--2022-06-26 04:52:05--  https://cve.mitre.org/data/downloads/allitems.csv
Resolving cve.mitre.org (cve.mitre.org)... 198.49.146.233, 192.52.194.135
Connecting to cve.mitre.org (cve.mitre.org)|198.49.146.233|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 157019129 (150M) [text/csv]
Saving to: ‘allitems.csv.1’


2022-06-26 04:52:15 (15.9 MB/s) - ‘allitems.csv.1’ saved [157019129/157019129]



#### Step 2: Open the CVE file into software such as Microsoft Excel.

In [6]:
# The Python Pandas library is able to import csv files into memory and the format is similar to an Excel spreadsheet.
# Let's import the library to make sure it is installed.
import pandas as pd

In [7]:
# The following command will load the file downloaded in Step 1 and then display the first 5 lines of the file to visually inspect whether or not it loaded properly into the DataFrame.
dfCVE = pd.read_csv('allitems.csv', sep=',', skiprows=[0,1,3,4,5,6,7,8,9], header=0, encoding = "ISO-8859-1")
# The :5 declares displayal of 5 rows.
dfCVE[:5]

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Name,Status,Description,References,Phase,Votes,Comments
0,CVE-1999-0001,Candidate,ip_input.c in BSD-derived TCP/IP implementatio...,BUGTRAQ:19981223 Re: CERT Advisory CA-98.13 - ...,Modified (20051217),"MODIFY(1) Frech | NOOP(2) Northcutt, W...",Christey> A Bugtraq posting indicates that the...
1,CVE-1999-0002,Entry,Buffer overflow in NFS mountd gives root acces...,BID:121 | URL:http://www.securityfocus.com...,,,
2,CVE-1999-0003,Entry,Execute commands as root via buffer overflow i...,BID:122 | URL:http://www.securityfocus.com...,,,
3,CVE-1999-0004,Candidate,"MIME buffer overflow in email clients, e.g. So...",CERT:CA-98.10.mime_buffer_overflows | MS:M...,Modified (19990621),"ACCEPT(8) Baker, Cole, Collins, Dik, Landfi...","Frech> Extremely minor, but I believe e-mail i..."
4,CVE-1999-0005,Entry,Arbitrary command execution via IMAP buffer ov...,BID:130 | URL:http://www.securityfocus.com...,,,


In [8]:
print(len(dfCVE))

239896


#### Step 3: Filter on the columns and rows to ensure the relevant data records are available.

In [9]:
# Function to clean the names - there is probably a better way of doing all of this.
def normalize_columns(Phase): 
    Phase = str(Phase)
    if re.search('(\()(\d\d\d\d\d\d\d\d)', Phase): 
        pos = re.search('(\()(\d\d\d\d\d\d\d\d)', Phase)
        pos = pos.group(2)
        date_str = pos
        format_str = '%Y%m%d'
        pos = datetime.datetime.strptime(date_str, format_str)      
        return pos
    else: 
        # if cleanup needed return the same name 
        pos = "19000101"
        date_str = pos
        format_str = '%Y%m%d'
        pos = datetime.datetime.strptime(date_str, format_str) 
        return 'null'

# Add a normalized CVE date column
dfCVE['CVEDate'] = dfCVE['Phase'].apply(normalize_columns)
# Drop the null date values
dfCVE.drop(dfCVE[dfCVE.CVEDate == 'null'].index, inplace=True)
#dfCVE.drop(dfCVE[dfCVE.Status == 'Candidate'].index, inplace=True)
#dfCVE.drop(dfCVE[dfCVE.Description  'Reserved'].index, inplace=True)
dfCVE = dfCVE[~dfCVE.Description.str.contains("RESERVED")]
dfCVE[:5]

Unnamed: 0,Name,Status,Description,References,Phase,Votes,Comments,CVEDate
0,CVE-1999-0001,Candidate,ip_input.c in BSD-derived TCP/IP implementatio...,BUGTRAQ:19981223 Re: CERT Advisory CA-98.13 - ...,Modified (20051217),"MODIFY(1) Frech | NOOP(2) Northcutt, W...",Christey> A Bugtraq posting indicates that the...,2005-12-17 00:00:00
3,CVE-1999-0004,Candidate,"MIME buffer overflow in email clients, e.g. So...",CERT:CA-98.10.mime_buffer_overflows | MS:M...,Modified (19990621),"ACCEPT(8) Baker, Cole, Collins, Dik, Landfi...","Frech> Extremely minor, but I believe e-mail i...",1999-06-21 00:00:00
14,CVE-1999-0015,Candidate,Teardrop IP denial of service.,CERT:CA-97.28.Teardrop_Land | OVAL:oval:or...,Modified (20090302),ACCEPT(1) Wall | MODIFY(1) Frech | ...,Frech> XF: teardrop-mod | Christey> Not su...,2009-03-02 00:00:00
19,CVE-1999-0020,Candidate,** REJECT ** DO NOT USE THIS CANDIDATE NUMBER...,,Modified (20050204),"MODIFY(1) Frech | NOOP(4) Levy, Northc...",Frech> XF:lpr-bo | Christey> DUPE CVE-1999...,2005-02-04 00:00:00
29,CVE-1999-0030,Candidate,root privileges via buffer overflow in xlock c...,AUSCERT:AA-97.24.IRIX.xlock.buffer.overflow.vu...,Proposed (19990623),"ACCEPT(3) Levy, Ozancin, Prosser | NOO...",Frech> XF:xlock-bo (also add) | As per xlo...,1999-06-23 00:00:00


#### Step 4: Add the necessary calculations to the records.

In [13]:
## How many per year
# Convert the CVEDate field to a date time format
dfCVE['CVEDate'] = pd.to_datetime(dfCVE['CVEDate'], errors='coerce')
# Retrieve the year field add aggregate the CVEs per year and count them
CVEYear = dfCVE['CVEDate'].groupby([dfCVE.CVEDate.dt.year]).agg('count').to_frame('counts')
# Convert results to dataframe
dfCVEYear = pd.DataFrame(CVEYear)
dfCVEYear = dfCVEYear.reset_index()
dfCVEYear.columns = ['Year', 'Count'] # change column names
dfCVEYear

Unnamed: 0,Year,Count
0,1999,249
1,2000,351
2,2001,696
3,2002,838
4,2003,1004
5,2004,1161
6,2005,7142
7,2006,7028
8,2007,7538
9,2008,5894


In [14]:
# Which months are busiest for CVE - Could indicate more patching
CVEMonth = dfCVE['CVEDate'].groupby([dfCVE.CVEDate.dt.month]).agg('count').to_frame('counts')
# Convert results to dataframe
dfCVEMonth = pd.DataFrame(CVEMonth)
dfCVEMonth = dfCVEMonth.reset_index()
dfCVEMonth.columns = ['Month', 'Count'] # change column names
dfCVEMonth

Unnamed: 0,Month,Count
0,1,23235
1,2,13732
2,3,15744
3,4,12954
4,5,12889
5,6,14559
6,7,11645
7,8,16136
8,9,12958
9,10,12867


#### Step 5: Create a chart or graph to represent the data.


In [17]:
# Add the imports - these can be moved to the top into the import section for cleanup
import numpy as np
from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure

# Confirm that Bokeh is working
output_notebook()
dfCVEYearMod = dfCVEYear.iloc[1:]
p = figure(plot_width=400, plot_height=400)
p.circle(dfCVEYearMod['Year'],dfCVEYearMod['Count'], size=10, line_color="navy", fill_color="red", fill_alpha=0.5)

show(p) # show the results

#### Step 6: Save and share the reports.

In [18]:
# Generate a HTML file of the embedded chart
output_file(filename="CVE_Counts_By_Year.html", title="CVE Counts by Year")
save(p)

'/content/CVE_Counts_By_Year.html'