# Below is the first line(s) of code we will execute, hit Shift + Enter!

In [None]:
print("Hello World!")

# Analyzing HTTP Using stacking Techniques

## Hypothesis: 
Attackers will scan during the recon phase with enumeration tools that will have unique characteristics that appear in the user-agent field of HTTP logs <br>
References:<br>
https://github.com/ThreatHuntingProject/ThreatHunting/blob/master/hunts/http_user_agent_analysis.md <br>
https://posts.specterops.io/threat-hunting-with-jupyter-notebooks-part-1-your-first-notebook-9a99a781fde7 <br>
https://dgunter.com/2017/09/17/threat-hunting-with-python-prologue-and-basic-http-hunting/ <br>

## Step 1: Take the http.log file, parse out the fields of importance, and stack/count on those desired fields to discover anomalies 

### Import Python libraries for statistical analysis

In [53]:
import json
from datetime import datetime, timedelta
from collections import OrderedDict
import matplotlib.pylab as plot
import numpy as np

### Read in the http.log file, split lines, remove comments

In [54]:
# Read data from http bro logs
with open("http.log",'r') as infile:
    file_data = infile.read()
    
# Split file by newlines
file_data = file_data.split('\n')

# Remove comment lines
http_data = []
for line in file_data:
    if line[0] is not None and line[0] != "#":
        http_data.append(line)

In [None]:
# Stack user agents
user_agents = {}
for line in http_data:
    if len(line.split('\t')) > 12:
        user_agent = line.split('\t')[11]
        if user_agent not in user_agents.keys():
            user_agents[user_agent] = 1
        else:
            user_agents[user_agent] += 1
            
# Sort the dictionary for long tail analysis and print out
sortedUser_agents = OrderedDict(sorted(user_agents.items(), reverse=True, key=lambda x: x[1]))
print(json.dumps(sortedUser_agents,indent=2))

### What stands out in the user-agent strings? Let's search on this string

In [None]:
# Let's search for the nmap user agent
suspicious_user_agents = ['Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)']
nmap_scanned_hosts = {}
for line in http_data:
    if len(line.split('\t')) > 12:
        client = line.split('\t')[2]
        server = line.split('\t')[4]
        user_agent = line.split('\t')[11]
        
        # If you have a positive hit for user-agent string
        if user_agent in suspicious_user_agents:
            
            # Condition where client is not in dict
            if client not in nmap_scanned_hosts.keys():
                nmap_scanned_hosts[client] = {server:1}
                
            # Condition where server is not mapped to client in dict    
            elif server not in nmap_scanned_hosts[client].keys():
                nmap_scanned_hosts[client][server] = 1
            
            # Condition where both are mapped and exist, then increment
            else:
                nmap_scanned_hosts[client][server] += 1

print(json.dumps(nmap_scanned_hosts,indent=2))

### Does the above output prove or disprove the hypothesis?

### Grab information about the hosts in question and start an investigation/triage. Output pertinent information to CSV for the handoff.

In [59]:
# Build suspicious hosts dict
suspicious_hosts = {}
for client in sorted(nmap_scanned_hosts.keys()):
    for server in sorted(nmap_scanned_hosts[client].keys()):
        if client not in suspicious_hosts.keys():
            suspicious_hosts[client] = [server]
        else:
            suspicious_hosts[client].append(server)

# Write CSV file out for display/distribution in excel
with open('suspicious_http_records.csv','w') as outfile:
    # Set column headers
    outfile.write("ts,uid,id.orig_h,id.orig_p,id.resp_h,id.resp_p,trans_depth,method,host,uri,referrer,user_agent,request_body_len,response_body_len,status_code,status_msg,info_code,info_msg,filename,tags,username,password,proxied,orig_fuids,orig_mime_types,resp_fuids,resp_mime_types\n")
    
    for line in http_data:
        if len(line.split('\t')) > 12:
            timestamp = line.split('\t')[0]
            client = line.split('\t')[2]
            server = line.split('\t')[4]
            user_agent = line.split('\t')[11]
            uri = line.split('\t')[9]
            if client in suspicious_hosts.keys():
                if server in suspicious_hosts[client]:
                    outfile.write("\"" + line.replace("\t","\",\"") + "\"\n")
