In [1]:
import pandas as pd
import csv
import os

In [6]:
# Generate http.csv from http.log in order to use pandas package for analysis

dic = {"http.log":['ts', 'uid', 'source_ip', 'source_port', 'destination_ip', 'destination_port', 
            'trans_depth', 'method', 'host', 'uri', 'referrer', 'user_agent', 'request_body_len', 
            'response_body_len', 'status_code', 'status_msg', 'info_code', 'info_msg', 'filename', 
            'tags', 'username', 'password', 'proxied', 'orig_fuids', 'orig_mime_types', 'resp_fuids', 
            'resp_mime_types']}
path = "./logs-processing"

for filename in os.listdir(path):
    if filename not in dic:
        continue
    with open(path + "/" + filename.replace("log", "csv"), 'w+', encoding = 'utf-8', newline = '') as csvfile:
        w = csv.writer(csvfile, dialect = 'excel')
        with open(path + "/" + filename, encoding = "utf8") as file:
            lines = file.read().split('\n')
            lines = lines[:-1]
            files = [dic[filename]]
        for line in lines:
            cells = []
            for item in line.split('\t'):
                if item == "-":
                    cells.append(item.replace("-", ""))
                else:
                    cells.append(item)
            files.append(cells)
        w.writerows(files)

In [7]:
# Get availability of column values

df = pd.read_csv("./logs-processing/http.csv")

# DataFrame with columns
columns = pd.DataFrame(list(df.columns.values[1:]))

# DataFrame with data types
data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])

# DataFrame with Count
data_count = pd.DataFrame(df.count(), columns=['Count'])

# DataFrame with unique values
unique_value_counts = pd.DataFrame(columns=['Unique Values'])

for v in list(df.columns.values):
    unique_value_counts.loc[v] = [df[v].nunique()]
missing_data_counts = pd.DataFrame(df.isnull().sum(), columns = ['Missing Values'])
http_digestion_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)
print('HTTP Digestion Report')

http_digestion_report

  exec(code_obj, self.user_global_ns, self.user_ns)


HTTP Digestion Report


Unnamed: 0,Data Type,Count,Unique Values,Missing Values
ts,float64,2048442,668595,0
uid,object,2048442,479204,0
source_ip,object,2048442,71,0
source_port,int64,2048442,37686,0
destination_ip,object,2048442,88,0
destination_port,int64,2048442,8,0
trans_depth,int64,2048442,1207,0
method,object,2047566,143,876
host,object,2042003,315,6439
uri,object,2047566,1591739,876


In [11]:
# Sanity Checkpoint

df.head()

Unnamed: 0,ts,uid,source_ip,source_port,destination_ip,destination_port,trans_depth,method,host,uri,...,info_msg,filename,tags,username,password,proxied,orig_fuids,orig_mime_types,resp_fuids,resp_mime_types
0,1331901000.0,CHEt7z3AzG4gyCNgci,192.168.202.79,50465,192.168.229.251,80,1,HEAD,192.168.229.251,/DEASLog02.nsf,...,,,(empty),,,,,,,
1,1331901000.0,CKnDAp2ohlvN6rpiXl,192.168.202.79,50467,192.168.229.251,80,1,HEAD,192.168.229.251,/DEASLog03.nsf,...,,,(empty),,,,,,,
2,1331901000.0,CNTrjn42F3LB58MZH6,192.168.202.79,50469,192.168.229.251,80,1,HEAD,192.168.229.251,/DEASLog04.nsf,...,,,(empty),,,,,,,
3,1331901000.0,C1D7mK1PlzKEnEyG03,192.168.202.79,50471,192.168.229.251,80,1,HEAD,192.168.229.251,/DEASLog05.nsf,...,,,(empty),,,,,,,
4,1331901000.0,CGF1bVMyl9ALKI32l,192.168.202.79,50473,192.168.229.251,80,1,HEAD,192.168.229.251,/DEASLog.nsf,...,,,(empty),,,,,,,


In [15]:
# Get unique source IP addresses accessing the HTTP server

print('Unique source IP addresses:')
print(df.source_ip.unique())

print('\nNo. of unique source IP addresses: ', len(df.source_ip.unique()))

Unique source IP addresses:
['192.168.202.79' '192.168.202.76' '192.168.203.61' '192.168.203.62'
 '192.168.204.70' '192.168.202.73' '192.168.202.90' '192.168.202.102'
 '192.168.202.96' '192.168.202.110' '192.168.202.87' '192.168.202.83'
 '192.168.203.45' '192.168.28.100' '192.168.202.112' '192.168.24.100'
 '192.168.26.100' '192.168.27.100' '192.168.204.45' '192.168.202.103'
 '192.168.203.64' '192.168.202.94' '192.168.202.115' '192.168.202.95'
 '192.168.202.101' '192.168.202.100' '192.168.202.107' '192.168.202.80'
 '192.168.202.108' '192.168.202.118' '192.168.203.63' '192.168.202.122'
 '192.168.202.65' '192.168.25.100' '192.168.202.97'
 '2001:dbb:c18:204:20c:29ff:fe4e:9e86' '192.168.26.254' '192.168.202.125'
 '2001:dbb:c18:202:20c:29ff:fe93:571e' '192.168.202.109' '192.168.202.4'
 '192.168.23.254' '192.168.202.88' '192.168.202.98' '192.168.23.100'
 '192.168.22.100' '192.168.202.133' '192.168.202.91' '192.168.202.68'
 '192.168.204.60' '192.168.202.135' '192.168.203.66' '192.168.202.138'


In [10]:
# To check how many times each individual source IP accessed the HTTP server

for ip in df.source_ip.unique():
    ipobj = df.apply(lambda x: True if x['source_ip'] == ip else False, axis=1)
    numofaccess = len(ipobj[ipobj == True].index)
    print("IP", str(ip), "status:", str(numofaccess), "("+"{:.4f}".format(numofaccess/df.shape[0]*100), "%)")

IP 192.168.202.79 status: 232259 (11.3383 %)
IP 192.168.202.76 status: 4497 (0.2195 %)
IP 192.168.203.61 status: 161 (0.0079 %)
IP 192.168.203.62 status: 223 (0.0109 %)
IP 192.168.204.70 status: 1211 (0.0591 %)
IP 192.168.202.73 status: 3 (0.0001 %)
IP 192.168.202.90 status: 1750 (0.0854 %)
IP 192.168.202.102 status: 212234 (10.3608 %)
IP 192.168.202.96 status: 10487 (0.5120 %)
IP 192.168.202.110 status: 169126 (8.2563 %)
IP 192.168.202.87 status: 1040 (0.0508 %)
IP 192.168.202.83 status: 35 (0.0017 %)
IP 192.168.203.45 status: 1429 (0.0698 %)
IP 192.168.28.100 status: 207 (0.0101 %)
IP 192.168.202.112 status: 5631 (0.2749 %)
IP 192.168.24.100 status: 6 (0.0003 %)
IP 192.168.26.100 status: 20 (0.0010 %)
IP 192.168.27.100 status: 10 (0.0005 %)
IP 192.168.204.45 status: 1888 (0.0922 %)
IP 192.168.202.103 status: 2785 (0.1360 %)
IP 192.168.203.64 status: 393 (0.0192 %)
IP 192.168.202.94 status: 544 (0.0266 %)
IP 192.168.202.115 status: 34 (0.0017 %)
IP 192.168.202.95 status: 418 (0.0204 %

In [26]:
# Sanity Checkpoint

df.query("source_ip == '192.168.203.63'")[:20]

Unnamed: 0,ts,uid,source_ip,source_port,destination_ip,destination_port,trans_depth,method,host,uri,...,info_msg,filename,tags,username,password,proxied,orig_fuids,orig_mime_types,resp_fuids,resp_mime_types
149907,1331914000.0,CI58Leqoe4pMvfAq6,192.168.203.63,57792,192.168.23.103,80,1,GET,192.168.23.103,/,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FNisMJIalf7Oftg6b,text/html
149913,1331914000.0,CvJhVw4ZlXrJ3jkRLc,192.168.203.63,57793,192.168.23.103,80,1,GET,192.168.23.103,/favicon.ico,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FVMarc1oItBH1icJ0g,application/xml
149914,1331914000.0,CI58Leqoe4pMvfAq6,192.168.203.63,57792,192.168.23.103,80,2,GET,192.168.23.103,/interface/login/login_frame.php?site=default,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FOzbtL2267lq4gki25,text/html
149915,1331914000.0,CI58Leqoe4pMvfAq6,192.168.203.63,57792,192.168.23.103,80,3,GET,192.168.23.103,/interface/themes/style_oemr.css,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FbGWmg2WeRO0VQ6Nf5,text/x-asm
149916,1331914000.0,CTLXpR3xbXm0ykjBef,192.168.203.63,57794,192.168.23.103,80,1,GET,192.168.23.103,/interface/login/filler.php,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FgnSaD2sTTMjgcGhm8,text/html
149917,1331914000.0,Cobl2S3PcJsfIoC624,192.168.203.63,57795,192.168.23.103,80,1,GET,192.168.23.103,/interface/login/login_title.php,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,F76Pn24DyLOa0mg1Hj,text/html
149918,1331914000.0,CI58Leqoe4pMvfAq6,192.168.203.63,57792,192.168.23.103,80,4,GET,192.168.23.103,/interface/pic/logo.gif,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FkysY033bnspTTbTyc,image/gif
149919,1331914000.0,CmHcK54WqXcwVQEWZ2,192.168.203.63,57796,192.168.23.103,80,1,GET,192.168.23.103,/interface/login/login.php,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FZIQ6H1aKj5r3gC6Qb,text/html
149920,1331914000.0,CTLXpR3xbXm0ykjBef,192.168.203.63,57794,192.168.23.103,80,2,GET,192.168.23.103,/library/js/jquery-1.4.3.min.js,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,FqqLBW2hk2wPVcpF4e,text/plain
149921,1331914000.0,Cobl2S3PcJsfIoC624,192.168.203.63,57795,192.168.23.103,80,2,GET,192.168.23.103,/sites/default/images/login_logo.gif,...,,,(empty),,,PROXY-CONNECTION -> keep-alive,,,Ftye0641Aw804Timjf,image/gif


In [34]:
# To check request/response pairs by each unique source IP to understand request intention (NO DOMAIN KNOWLEDGE TO DECIDE
# WHICH IP IS PERFORMING RECONNAISSANCE ACTIVITIES AGAINST AN ENTERPRISE WEB SERVER)

# This action may provide clues to decide which IP addresses are suspicious

# columns_interest = ['method', 'host', 'uri', 'referrer', 'user_agent', 'request_body_len', 
#             'response_body_len', 'status_code', 'status_msg', 'info_code', 'info_msg', 'proxied']

for ip in df.source_ip.unique():
    
    df_temp = df.query("source_ip == {}".format(ip.split(' ')))
    
    print("IP: ", str(ip))
    print('\nMethod: ', df_temp.method.unique())
#     print('Host: ', df_temp.host.unique())
#     print('URI: ', df_temp.uri.unique())
#     print('Referrer: ', df_temp.referrer.unique())
    print('User Agent: ', df_temp.user_agent.unique())
#     print('Request Body Length: ', df_temp.request_body_len.unique())
#     print('Response Body Length: ', df_temp.response_body_len.unique())
    print('Status Code: ', df_temp.status_code.unique())
    print('Status Message: ', df_temp.status_msg.unique())
    print('Info Code: ', df_temp.info_code.unique())
    print('Info Message: ', df_temp.info_msg.unique())
    print('Proxied: ', df_temp.proxied.unique())
    print('\n\n')

IP:  192.168.202.79

Method:  ['HEAD' 'GET' 'POST' 'OPTIONS' 'TRACE' 'PROPFIND' nan 'CONNECT' 'PUT'
 'TRACK' 'DEBUG' 'get' 'INDEX' 'SEARCH' 'DELETE']
User Agent:  ['Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)'
 nan
 'Mozilla/5.0 (X11; Linux i686; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'
 ... 'Mozilla/4.0 (compatible; MSIE 5.0; Windows NT;)' 'Python-urllib/2.6'
 'cadaver/0.23.3 neon/0.29.0']
Status Code:  [404. 401. 200. 403. 500. 400.  nan 503. 405. 501. 206. 302. 303. 505.
 304. 301. 201. 207. 204.]
Status Message:  ['Not Found' 'Unauthorized' 'OK' 'Forbidden' 'Internal Server Error'
 'Bad Request' nan 'Service Unavailable' 'NOT FOUND' 'Access denied'
 'METHOD NOT ALLOWED' 'Not Implemented' 'Partial Content' 'Found'
 'Not Allowed' '<empty>' 'See Other' 'Authorization Required'
 'HTTP Version Not Supported' 'NOT MODIFIED' 'Moved Permanently'
 'Method Not Allowed' 'Method Not Implemented' 'Moved Temporarily'
 'Not Modified' 'Created' 'Multi-Status' 'N

IP:  192.168.203.45

Method:  ['GET' nan 'OPTIONS' 'HEAD' 'CONNECT' 'POST' 'PUT']
User Agent:  [nan
 'Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)'
 'Mozilla/5.0 (X11; Linux i686 on x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1'
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)']
Status Code:  [200. 503. 400. 302. 403.  nan 404. 303. 500. 304. 301. 405. 401. 201.]
Status Message:  ['OK' 'Service Unavailable' 'Bad Request' 'Found' 'Forbidden' '<empty>'
 nan 'Not Found' 'NOT FOUND' 'See Other' 'Internal Server Error'
 'Not Modified' 'Moved Permanently' 'FOUND' 'Method Not Allowed'
 'Authorization Required' 'Created']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan 'VIA -> SIP/2.0/TCP nm;branch=foo']



IP:  192.168.28.100

Method:  ['POST']
User Agent:  ['Mozilla/5.0 (Windows NT 6.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1']
Status Code:  [200.  nan]
Status Message:  ['OK' nan]
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan]



IP:  192.1

IP:  192.168.203.63

Method:  ['GET' 'POST' 'HEAD']
User Agent:  ['Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.11) Gecko/20101013 Ubuntu/9.04 (jaunty) Firefox/3.6.11'
 'DirBuster-0.12 (http://www.owasp.org/index.php/Category:OWASP_DirBuster_Project)'
 'w3af.sourceforge.net']
Status Code:  [200. 404. 302.  nan 304. 401. 405. 400.]
Status Message:  ['OK' 'Not Found' 'Found' 'NOT FOUND' nan 'NOT MODIFIED' 'UNAUTHORIZED'
 'METHOD NOT ALLOWED' 'Bad Request']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  ['PROXY-CONNECTION -> keep-alive' nan]



IP:  192.168.202.122

Method:  ['GET' 'POST']
User Agent:  ['Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20100101 Firefox/11.0'
 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 3.0.04506.30)']
Status Code:  [200. 404. 302. 304. 401.  nan]
Status Message:  ['OK' 'NOT FOUND' 'FOUND' 'NOT MODIFIED' 'Not Found' 'UNAUTHORIZED' nan
 'Found']
Info Code:  [ nan 100

IP:  192.168.202.109

Method:  ['GET' 'POST']
User Agent:  ['Mozilla/5.0 (X11; Linux i686 on x86_64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2']
Status Code:  [200. 302. 304. 403. 404. 401.]
Status Message:  ['OK' 'Found' 'Not Modified' 'Forbidden' 'Not Found'
 'Authorization Required']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan]



IP:  192.168.202.4

Method:  ['GET' nan 'OPTIONS' 'HEAD' 'CONNECT' 'PROPFIND']
User Agent:  [nan
 'Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)'
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)']
Status Code:  [200. 302. 403. 400.  nan 503. 404. 303. 500. 405.]
Status Message:  ['OK' 'Found' 'Forbidden' '<empty>' nan 'Bad Request'
 'Service Unavailable' 'Not Found' 'See Other' 'NOT FOUND'
 'Internal Server Error' 'Method Not Allowed']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan 'VIA -> SIP/2.0/TCP nm;branch=foo']



IP:  192.168.23.254

Method:  ['GET']
User Agent:  ['Mozilla/5.0 (X11; Linux i686; rv

Status Message:  ['OK']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan]



IP:  192.168.28.253

Method:  ['GET']
User Agent:  ['Wget/1.10.2 (Red Hat modified)']
Status Code:  [200.]
Status Message:  ['OK']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan]



IP:  192.168.202.222

Method:  ['GET' 'POST']
User Agent:  ['Mozilla/5.0 (X11; Linux i686; rv:5.0.1) Gecko/20100101 Firefox/5.0.1']
Status Code:  [200. 404.]
Status Message:  ['OK' 'NOT FOUND' 'Not Found']
Info Code:  [nan]
Info Message:  [nan]
Proxied:  [nan]



IP:  192.168.202.136

Method:  ['GET' 'PUT' 'POST' 'OPTIONS' 'HEAD' 'CONNECT']
User Agent:  [nan 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
 'Mozilla/5.0 (X11; Linux i686; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'
 'Wget/1.12 (linux-gnu)'
 'Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)']
Status Code:  [200. 503. 405. 401. 500. 201.  nan 302. 404. 403.]
Status Message:  ['OK' 'Service Unavailable' 'Method Not Allowed' '