In [1]:
import sys
import findspark
findspark.init()
import pyspark
from pyspark import SparkContext, SparkConf
import os

In [2]:

conf = SparkConf().setAppName("Read Log files").setMaster("local[*]")
sc = SparkContext(conf = conf)


In [3]:
# A regular expression pattern to extract fields from the log line
#APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+)\s*(\S*)" (\d{3}) (\S+)'

APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (.*?) (\S+)" (\d{3}) (\S+) "(.*?)" "(.*?)"$'


In [4]:
import re
import datetime

from pyspark.sql import Row

month_map = {'Jan': 1, 'Feb': 2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7,
    'Aug':8,  'Sep': 9, 'Oct':10, 'Nov': 11, 'Dec': 12}

def parse_apache_time(s):
    """ Convert Apache time format into a Python datetime object
    Args:
        s (str): date and time in Apache time format
    Returns:
        datetime: datetime object (ignore timezone for now)
    """
    return datetime.datetime(int(s[7:11]),
                             month_map[s[3:6]],
                             int(s[0:2]),
                             int(s[12:14]),
                             int(s[15:17]),
                             int(s[18:20]))


def parseApacheLogLine(logline):
    """ Parse a line in the Apache Common Log format
    Args:
        logline (str): a line of text in the Apache Common Log format
    Returns:
        tuple: either a dictionary containing the parts of the Apache Access Log and 1,
               or the original invalid log line and 0
    """
    match = re.search(APACHE_ACCESS_LOG_PATTERN, logline)
    if match is None:
        return (logline, 0)
    size_field = match.group(9)
    if size_field == '-':
        size = 0
    else:
        size = match.group(9)
    return (Row(
        host          = match.group(1),
        client_identd = match.group(2),
        user_id       = match.group(3),
        date_time     = parse_apache_time(match.group(4)),
        method        = match.group(5),
        endpoint      = match.group(6),
        protocol      = match.group(7),
        response_code = int(match.group(8)),
        content_size  = size,
        referrer     = match.group(10),
        userAgent    = match.group(11)
    ), 1)



In [5]:
logFile = "apache_logs.txt"

def parseLogs():
    """ Read and parse log file """
    parsed_logs = (sc.textFile(logFile).map(parseApacheLogLine).cache())

    access_logs = (parsed_logs
                   .filter(lambda s: s[1] == 1)
                   .map(lambda s: s[0])
                   .cache())

    failed_logs = (parsed_logs
                   .filter(lambda s: s[1] == 0)
                   .map(lambda s: s[0]))
    failed_logs_count = failed_logs.count()
    if failed_logs_count > 0:
        print ('Number of invalid logline: %d' % failed_logs.count())
        for line in failed_logs.take(20):
            print ('Invalid logline: %s' % line)

    print ('Read %d lines, successfully parsed %d lines, failed to parse %d lines' % (parsed_logs.count(), access_logs.count(), failed_logs.count()))
    return parsed_logs, access_logs, failed_logs


parsed_logs, access_logs, failed_logs = parseLogs()




Number of invalid logline: 2
Invalid logline: 83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"
Invalid logline: 46.118.127.106 - - [20/May/2015:12:05:17 +0000] "GET /scripts/grok-py-test/configlib.py HTTP/1.1" 200 235 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html
Read 10000 lines, successfully parsed 9998 lines, failed to parse 2 lines


In [6]:
print(access_logs.take(2))

[Row(host='83.149.9.216', client_identd='-', user_id='-', date_time=datetime.datetime(2015, 5, 17, 10, 5, 43), method='GET', endpoint='/presentations/logstash-monitorama-2013/images/kibana-dashboard3.png', protocol='HTTP/1.1', response_code=200, content_size='171717', referrer='http://semicomplete.com/presentations/logstash-monitorama-2013/', userAgent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36'), Row(host='83.149.9.216', client_identd='-', user_id='-', date_time=datetime.datetime(2015, 5, 17, 10, 5, 47), method='GET', endpoint='/presentations/logstash-monitorama-2013/plugin/highlight/highlight.js', protocol='HTTP/1.1', response_code=200, content_size='26185', referrer='http://semicomplete.com/presentations/logstash-monitorama-2013/', userAgent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36')]


In [29]:
access_logs.groupBy(lambda word: word.host)
print(access_logs.take(2))

[Row(host='83.149.9.216', client_identd='-', user_id='-', date_time=datetime.datetime(2015, 5, 17, 10, 5, 43), method='GET', endpoint='/presentations/logstash-monitorama-2013/images/kibana-dashboard3.png', protocol='HTTP/1.1', response_code=200, content_size='171717', referrer='http://semicomplete.com/presentations/logstash-monitorama-2013/', userAgent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36'), Row(host='83.149.9.216', client_identd='-', user_id='-', date_time=datetime.datetime(2015, 5, 17, 10, 5, 47), method='GET', endpoint='/presentations/logstash-monitorama-2013/plugin/highlight/highlight.js', protocol='HTTP/1.1', response_code=200, content_size='26185', referrer='http://semicomplete.com/presentations/logstash-monitorama-2013/', userAgent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36')]


In [43]:
#total session time

times_date = access_logs.map(lambda s: s.date_time)
bSorted = times_date.sortBy(lambda a: a)

first_time = bSorted.first()
totalCount = bSorted.count()
last_time = bSorted.collect()[totalCount - 1 ]
total_session_time = last_time - first_time

print("Total session time: ", total_session_time)

Total session time:  3 days, 11:00:59


In [15]:
#total session time

g_rdd = access_logs.map(lambda s: (s.host, s.date_time))
r_rdd = g_rdd.groupBy(lambda w: w[0])

#print(r_rdd.take(10))
print(r_rdd.collect()[0][1])
for t in r_rdd.collect()[0][1]:
    print(t)

<pyspark.resultiterable.ResultIterable object at 0x0000020395135280>
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 43))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 47))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 12))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 7))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 34))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 57))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 50))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 24))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 50))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 46))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 11))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 19))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 33))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5))
('83.149.9.216', datetime.datetime(2015, 5, 17, 10, 5, 25))
('83.149.9.216', datetime.datetime(2

In [19]:
for t in access_logs.collect():
        print(t[0], t[1],t[2],t[3])

83.149.9.216 - - 2015-05-17 10:05:43
83.149.9.216 - - 2015-05-17 10:05:47
83.149.9.216 - - 2015-05-17 10:05:12
83.149.9.216 - - 2015-05-17 10:05:07
83.149.9.216 - - 2015-05-17 10:05:34
83.149.9.216 - - 2015-05-17 10:05:57
83.149.9.216 - - 2015-05-17 10:05:50
83.149.9.216 - - 2015-05-17 10:05:24
83.149.9.216 - - 2015-05-17 10:05:50
83.149.9.216 - - 2015-05-17 10:05:46
83.149.9.216 - - 2015-05-17 10:05:11
83.149.9.216 - - 2015-05-17 10:05:19
83.149.9.216 - - 2015-05-17 10:05:33
83.149.9.216 - - 2015-05-17 10:05:00
83.149.9.216 - - 2015-05-17 10:05:25
83.149.9.216 - - 2015-05-17 10:05:59
83.149.9.216 - - 2015-05-17 10:05:30
83.149.9.216 - - 2015-05-17 10:05:53
83.149.9.216 - - 2015-05-17 10:05:24
83.149.9.216 - - 2015-05-17 10:05:54
83.149.9.216 - - 2015-05-17 10:05:33
83.149.9.216 - - 2015-05-17 10:05:56
24.236.252.67 - - 2015-05-17 10:05:40
93.114.45.13 - - 2015-05-17 10:05:14
93.114.45.13 - - 2015-05-17 10:05:04
93.114.45.13 - - 2015-05-17 10:05:45
93.114.45.13 - - 2015-05-17 10:05:14


65.55.213.74 - - 2015-05-17 14:05:53
65.55.213.74 - - 2015-05-17 14:05:09
65.55.213.74 - - 2015-05-17 14:05:05
81.169.149.220 - - 2015-05-17 14:05:13
65.55.213.74 - - 2015-05-17 14:05:25
65.55.213.79 - - 2015-05-17 14:05:21
65.55.213.79 - - 2015-05-17 14:05:05
65.55.213.79 - - 2015-05-17 14:05:55
65.55.213.79 - - 2015-05-17 14:05:04
65.55.213.79 - - 2015-05-17 14:05:10
65.55.213.79 - - 2015-05-17 14:05:42
65.55.213.73 - - 2015-05-17 14:05:25
65.55.213.73 - - 2015-05-17 14:05:58
65.55.213.73 - - 2015-05-17 14:05:06
65.55.213.73 - - 2015-05-17 14:05:03
65.55.213.73 - - 2015-05-17 14:05:25
65.55.213.73 - - 2015-05-17 14:05:31
65.55.213.73 - - 2015-05-17 14:05:26
65.55.213.73 - - 2015-05-17 14:05:34
65.55.213.73 - - 2015-05-17 14:05:33
65.55.213.73 - - 2015-05-17 14:05:11
65.55.213.73 - - 2015-05-17 14:05:50
65.55.213.73 - - 2015-05-17 14:05:44
65.55.213.73 - - 2015-05-17 14:05:28
65.55.213.73 - - 2015-05-17 14:05:39
65.55.213.73 - - 2015-05-17 14:05:38
65.55.213.73 - - 2015-05-17 14:05:05

116.125.143.84 - - 2015-05-17 16:05:22
116.125.143.84 - - 2015-05-17 16:05:35
100.43.83.137 - - 2015-05-17 16:05:53
54.219.224.86 - - 2015-05-17 16:05:59
108.171.116.194 - - 2015-05-17 16:05:17
99.188.185.40 - - 2015-05-17 16:05:10
108.171.116.194 - - 2015-05-17 16:05:04
108.171.116.194 - - 2015-05-17 16:05:37
108.171.116.194 - - 2015-05-17 16:05:02
108.171.116.194 - - 2015-05-17 16:05:47
108.171.116.194 - - 2015-05-17 16:05:18
108.171.116.194 - - 2015-05-17 16:05:08
108.171.116.194 - - 2015-05-17 16:05:53
108.171.116.194 - - 2015-05-17 16:05:55
108.171.116.194 - - 2015-05-17 16:05:31
202.65.130.3 - - 2015-05-17 16:05:23
46.105.14.53 - - 2015-05-17 16:05:07
68.180.224.225 - - 2015-05-17 16:05:59
201.244.101.132 - - 2015-05-17 16:05:27
193.238.231.119 - - 2015-05-17 16:05:28
197.206.232.180 - - 2015-05-17 16:05:34
180.76.6.53 - - 2015-05-17 16:05:29
207.241.237.226 - - 2015-05-17 16:05:49
109.87.146.211 - - 2015-05-17 16:05:19
166.82.99.140 - - 2015-05-17 16:05:24
216.14.102.16 - - 2015

66.249.73.135 - - 2015-05-17 19:05:56
62.161.94.37 - - 2015-05-17 19:05:19
66.249.73.135 - - 2015-05-17 19:05:25
46.105.14.53 - - 2015-05-17 19:05:48
67.220.144.164 - - 2015-05-17 19:05:34
67.220.144.164 - - 2015-05-17 19:05:22
98.248.53.169 - - 2015-05-17 19:05:26
98.248.53.169 - - 2015-05-17 19:05:24
98.248.53.169 - - 2015-05-17 19:05:21
98.248.53.169 - - 2015-05-17 19:05:30
98.248.53.169 - - 2015-05-17 19:05:38
93.152.153.38 - - 2015-05-17 19:05:17
93.152.153.38 - - 2015-05-17 19:05:49
93.152.153.38 - - 2015-05-17 19:05:40
93.152.153.38 - - 2015-05-17 19:05:31
93.152.153.38 - - 2015-05-17 19:05:56
146.1.1.2 - - 2015-05-17 19:05:41
146.1.1.2 - - 2015-05-17 19:05:31
54.196.174.134 - - 2015-05-17 19:05:09
50.16.19.13 - - 2015-05-17 19:05:04
208.93.0.48 - - 2015-05-17 19:05:40
208.91.156.11 - - 2015-05-17 19:05:00
146.1.1.2 - - 2015-05-17 19:05:45
146.1.1.2 - - 2015-05-17 19:05:08
146.1.1.2 - - 2015-05-17 19:05:21
146.1.1.2 - - 2015-05-17 19:05:39
88.196.179.78 - - 2015-05-17 19:05:17
6

49.204.238.249 - - 2015-05-17 21:05:21
49.204.238.249 - - 2015-05-17 21:05:38
80.237.234.150 - - 2015-05-17 21:05:31
46.105.14.53 - - 2015-05-17 21:05:10
209.85.238.199 - - 2015-05-17 21:05:30
74.125.176.148 - - 2015-05-17 21:05:18
182.118.20.213 - - 2015-05-17 21:05:31
100.43.83.137 - - 2015-05-17 21:05:57
66.249.73.185 - - 2015-05-17 21:05:01
208.115.111.72 - - 2015-05-17 21:05:06
99.252.100.83 - - 2015-05-17 21:05:10
99.252.100.83 - - 2015-05-17 21:05:48
99.252.100.83 - - 2015-05-17 21:05:31
99.252.100.83 - - 2015-05-17 21:05:05
99.252.100.83 - - 2015-05-17 21:05:38
99.252.100.83 - - 2015-05-17 21:05:28
99.252.100.83 - - 2015-05-17 21:05:00
99.252.100.83 - - 2015-05-17 21:05:31
99.252.100.83 - - 2015-05-17 21:05:16
99.252.100.83 - - 2015-05-17 21:05:04
99.252.100.83 - - 2015-05-17 21:05:28
99.252.100.83 - - 2015-05-17 21:05:25
99.252.100.83 - - 2015-05-17 21:05:39
99.252.100.83 - - 2015-05-17 21:05:25
99.252.100.83 - - 2015-05-17 21:05:20
99.252.100.83 - - 2015-05-17 21:05:06
99.252

79.191.201.108 - - 2015-05-17 23:05:26
88.196.179.78 - - 2015-05-17 23:05:41
74.125.176.144 - - 2015-05-17 23:05:58
66.249.73.135 - - 2015-05-17 23:05:17
46.105.14.53 - - 2015-05-17 23:05:33
50.152.223.37 - - 2015-05-17 23:05:30
184.60.23.120 - - 2015-05-17 23:05:52
184.60.23.120 - - 2015-05-17 23:05:38
184.60.23.120 - - 2015-05-17 23:05:00
184.60.23.120 - - 2015-05-17 23:05:13
184.60.23.120 - - 2015-05-17 23:05:07
184.60.23.120 - - 2015-05-17 23:05:04
207.241.237.227 - - 2015-05-17 23:05:40
180.76.6.142 - - 2015-05-17 23:05:20
180.76.5.20 - - 2015-05-17 23:05:01
50.139.66.106 - - 2015-05-17 23:05:53
84.58.19.108 - - 2015-05-17 23:05:27
84.58.19.108 - - 2015-05-17 23:05:28
50.139.66.106 - - 2015-05-17 23:05:27
2.137.32.153 - - 2015-05-17 23:05:01
2.137.32.153 - - 2015-05-17 23:05:19
2.137.32.153 - - 2015-05-17 23:05:28
2.137.32.153 - - 2015-05-17 23:05:09
2.137.32.153 - - 2015-05-17 23:05:24
2.137.32.153 - - 2015-05-17 23:05:38
207.241.237.102 - - 2015-05-17 23:05:35
207.241.237.221 - 

83.115.137.249 - - 2015-05-18 02:05:58
100.43.83.137 - - 2015-05-18 02:05:13
83.115.137.249 - - 2015-05-18 02:05:03
23.20.83.155 - - 2015-05-18 02:05:48
23.20.83.155 - - 2015-05-18 02:05:15
207.241.237.220 - - 2015-05-18 02:05:27
66.249.73.135 - - 2015-05-18 02:05:47
207.241.237.223 - - 2015-05-18 02:05:08
207.241.237.102 - - 2015-05-18 02:05:09
197.235.139.144 - - 2015-05-18 02:05:37
207.241.237.101 - - 2015-05-18 02:05:01
107.170.40.199 - - 2015-05-18 02:05:13
74.125.176.150 - - 2015-05-18 02:05:44
46.105.14.53 - - 2015-05-18 02:05:15
207.241.237.221 - - 2015-05-18 02:05:27
66.249.73.135 - - 2015-05-18 02:05:38
109.189.132.223 - - 2015-05-18 02:05:23
207.241.237.103 - - 2015-05-18 02:05:19
109.189.132.223 - - 2015-05-18 02:05:35
109.189.132.223 - - 2015-05-18 02:05:39
109.189.132.223 - - 2015-05-18 02:05:51
109.189.132.223 - - 2015-05-18 02:05:41
109.189.132.223 - - 2015-05-18 02:05:37
207.241.237.220 - - 2015-05-18 02:05:21
207.241.237.103 - - 2015-05-18 02:05:59
207.241.237.220 - -

114.69.226.80 - - 2015-05-18 04:05:02
114.69.226.80 - - 2015-05-18 04:05:29
200.50.183.132 - - 2015-05-18 04:05:41
95.234.214.194 - - 2015-05-18 04:05:40
216.14.102.16 - - 2015-05-18 04:05:26
46.28.105.80 - - 2015-05-18 04:05:46
86.130.160.107 - - 2015-05-18 04:05:57
86.130.160.107 - - 2015-05-18 04:05:16
180.76.5.74 - - 2015-05-18 04:05:59
96.127.213.6 - - 2015-05-18 04:05:53
96.127.213.6 - - 2015-05-18 04:05:06
187.60.96.7 - - 2015-05-18 04:05:55
187.60.96.7 - - 2015-05-18 04:05:27
180.76.5.214 - - 2015-05-18 04:05:54
180.76.6.53 - - 2015-05-18 04:05:33
209.85.238.199 - - 2015-05-18 04:05:22
94.228.34.233 - - 2015-05-18 04:05:04
46.105.14.53 - - 2015-05-18 04:05:55
66.249.73.135 - - 2015-05-18 04:05:20
198.148.112.117 - - 2015-05-18 04:05:51
198.148.112.117 - - 2015-05-18 04:05:11
62.245.76.5 - - 2015-05-18 04:05:24
62.245.76.5 - - 2015-05-18 04:05:50
50.16.19.13 - - 2015-05-18 04:05:22
46.105.14.53 - - 2015-05-18 04:05:55
91.236.75.25 - - 2015-05-18 04:05:17
82.38.225.83 - - 2015-05

108.171.116.194 - - 2015-05-18 06:05:41
108.171.116.194 - - 2015-05-18 06:05:21
108.171.116.194 - - 2015-05-18 06:05:42
108.171.116.194 - - 2015-05-18 06:05:11
108.171.116.194 - - 2015-05-18 06:05:45
108.171.116.194 - - 2015-05-18 06:05:12
108.171.116.194 - - 2015-05-18 06:05:23
108.171.116.194 - - 2015-05-18 06:05:44
108.171.116.194 - - 2015-05-18 06:05:51
66.249.73.135 - - 2015-05-18 06:05:27
50.16.19.13 - - 2015-05-18 06:05:46
209.85.238.199 - - 2015-05-18 06:05:01
78.97.239.35 - - 2015-05-18 06:05:00
78.97.239.35 - - 2015-05-18 06:05:49
78.97.239.35 - - 2015-05-18 06:05:17
78.97.239.35 - - 2015-05-18 06:05:49
78.97.239.35 - - 2015-05-18 06:05:30
78.97.239.35 - - 2015-05-18 06:05:32
108.71.169.76 - - 2015-05-18 06:05:18
46.105.14.53 - - 2015-05-18 06:05:38
107.170.9.55 - - 2015-05-18 07:05:02
189.60.115.148 - - 2015-05-18 07:05:41
98.252.226.135 - - 2015-05-18 07:05:10
98.252.226.135 - - 2015-05-18 07:05:43
98.252.226.135 - - 2015-05-18 07:05:41
98.252.226.135 - - 2015-05-18 07:05:4

75.97.9.59 - - 2015-05-18 09:05:36
75.97.9.59 - - 2015-05-18 09:05:05
75.97.9.59 - - 2015-05-18 09:05:30
75.97.9.59 - - 2015-05-18 09:05:32
75.97.9.59 - - 2015-05-18 09:05:54
75.97.9.59 - - 2015-05-18 09:05:09
75.97.9.59 - - 2015-05-18 09:05:28
75.97.9.59 - - 2015-05-18 09:05:13
75.97.9.59 - - 2015-05-18 09:05:29
75.97.9.59 - - 2015-05-18 09:05:30
75.97.9.59 - - 2015-05-18 09:05:34
75.97.9.59 - - 2015-05-18 09:05:25
66.249.73.135 - - 2015-05-18 09:05:18
75.97.9.59 - - 2015-05-18 09:05:34
75.97.9.59 - - 2015-05-18 09:05:53
75.97.9.59 - - 2015-05-18 09:05:55
75.97.9.59 - - 2015-05-18 09:05:31
75.97.9.59 - - 2015-05-18 09:05:15
75.97.9.59 - - 2015-05-18 09:05:54
75.97.9.59 - - 2015-05-18 09:05:58
75.97.9.59 - - 2015-05-18 09:05:00
75.97.9.59 - - 2015-05-18 09:05:45
75.97.9.59 - - 2015-05-18 09:05:26
75.97.9.59 - - 2015-05-18 09:05:19
75.97.9.59 - - 2015-05-18 09:05:17
75.97.9.59 - - 2015-05-18 09:05:36
75.97.9.59 - - 2015-05-18 09:05:45
75.97.9.59 - - 2015-05-18 09:05:55
75.97.9.59 - - 20

68.4.202.231 - - 2015-05-18 11:05:04
46.118.158.25 - - 2015-05-18 11:05:19
178.255.215.71 - - 2015-05-18 11:05:29
209.85.238.199 - - 2015-05-18 11:05:59
180.76.6.142 - - 2015-05-18 11:05:01
66.249.73.185 - - 2015-05-18 11:05:07
46.105.14.53 - - 2015-05-18 11:05:29
66.249.73.135 - - 2015-05-18 11:05:02
66.249.73.135 - - 2015-05-18 11:05:07
173.55.80.151 - - 2015-05-18 11:05:53
66.249.73.135 - - 2015-05-18 11:05:03
66.249.73.135 - - 2015-05-18 11:05:26
66.249.73.135 - - 2015-05-18 11:05:46
68.180.224.225 - - 2015-05-18 11:05:28
173.55.80.151 - - 2015-05-18 11:05:59
66.249.73.135 - - 2015-05-18 11:05:43
46.105.14.53 - - 2015-05-18 11:05:48
180.76.6.213 - - 2015-05-18 11:05:32
66.249.73.185 - - 2015-05-18 11:05:45
85.246.85.57 - - 2015-05-18 11:05:10
100.43.83.137 - - 2015-05-18 11:05:51
50.16.19.13 - - 2015-05-18 11:05:06
100.43.83.137 - - 2015-05-18 11:05:51
100.43.83.137 - - 2015-05-18 11:05:27
100.43.83.137 - - 2015-05-18 11:05:31
100.43.83.137 - - 2015-05-18 11:05:20
181.141.250.44 - 

66.249.73.135 - - 2015-05-18 13:05:06
120.202.255.147 - - 2015-05-18 13:05:55
46.105.14.53 - - 2015-05-18 13:05:52
189.170.95.135 - - 2015-05-18 13:05:23
189.170.95.135 - - 2015-05-18 13:05:55
189.170.95.135 - - 2015-05-18 13:05:26
189.170.95.135 - - 2015-05-18 13:05:42
189.170.95.135 - - 2015-05-18 13:05:24
189.170.95.135 - - 2015-05-18 13:05:13
203.41.198.36 - - 2015-05-18 13:05:39
203.41.198.36 - - 2015-05-18 13:05:45
203.41.198.36 - - 2015-05-18 13:05:22
203.41.198.36 - - 2015-05-18 13:05:11
203.41.198.36 - - 2015-05-18 13:05:53
203.41.198.36 - - 2015-05-18 13:05:54
66.249.73.135 - - 2015-05-18 13:05:26
24.147.91.100 - - 2015-05-18 13:05:21
24.147.91.100 - - 2015-05-18 13:05:51
24.147.91.100 - - 2015-05-18 13:05:44
24.147.91.100 - - 2015-05-18 13:05:07
24.147.91.100 - - 2015-05-18 13:05:45
24.147.91.100 - - 2015-05-18 13:05:07
46.105.14.53 - - 2015-05-18 13:05:49
180.76.6.156 - - 2015-05-18 13:05:24
66.249.73.135 - - 2015-05-18 13:05:58
176.92.75.62 - - 2015-05-18 13:05:29
176.92.7

210.13.83.18 - - 2015-05-18 15:05:55
210.13.83.18 - - 2015-05-18 15:05:13
210.13.83.18 - - 2015-05-18 15:05:21
210.13.83.18 - - 2015-05-18 15:05:06
210.13.83.18 - - 2015-05-18 15:05:07
210.13.83.18 - - 2015-05-18 15:05:36
210.13.83.18 - - 2015-05-18 15:05:47
210.13.83.18 - - 2015-05-18 15:05:16
210.13.83.18 - - 2015-05-18 15:05:09
210.13.83.18 - - 2015-05-18 15:05:39
210.13.83.18 - - 2015-05-18 15:05:28
210.13.83.18 - - 2015-05-18 15:05:00
210.13.83.18 - - 2015-05-18 15:05:03
210.13.83.18 - - 2015-05-18 15:05:17
210.13.83.18 - - 2015-05-18 15:05:23
210.13.83.18 - - 2015-05-18 15:05:46
210.13.83.18 - - 2015-05-18 15:05:21
210.13.83.18 - - 2015-05-18 15:05:37
210.13.83.18 - - 2015-05-18 15:05:04
210.13.83.18 - - 2015-05-18 15:05:35
210.13.83.18 - - 2015-05-18 15:05:52
210.13.83.18 - - 2015-05-18 15:05:28
210.13.83.18 - - 2015-05-18 15:05:47
210.13.83.18 - - 2015-05-18 15:05:17
210.13.83.18 - - 2015-05-18 15:05:15
54.246.137.243 - - 2015-05-18 15:05:08
210.13.83.18 - - 2015-05-18 15:05:38

70.83.251.183 - - 2015-05-18 17:05:37
70.83.251.183 - - 2015-05-18 17:05:52
66.249.73.135 - - 2015-05-18 17:05:33
180.76.5.118 - - 2015-05-18 17:05:51
46.105.14.53 - - 2015-05-18 17:05:52
70.83.251.183 - - 2015-05-18 17:05:53
70.83.251.183 - - 2015-05-18 17:05:56
70.83.251.183 - - 2015-05-18 17:05:04
70.83.251.183 - - 2015-05-18 17:05:52
70.83.251.183 - - 2015-05-18 17:05:35
70.109.45.64 - - 2015-05-18 17:05:50
70.109.45.64 - - 2015-05-18 17:05:20
70.109.45.64 - - 2015-05-18 17:05:30
70.109.45.64 - - 2015-05-18 17:05:51
70.109.45.64 - - 2015-05-18 17:05:29
70.109.45.64 - - 2015-05-18 18:05:13
50.16.19.13 - - 2015-05-18 18:05:30
212.90.148.107 - - 2015-05-18 18:05:57
199.59.148.210 - - 2015-05-18 18:05:26
199.59.148.209 - - 2015-05-18 18:05:59
50.97.97.54 - - 2015-05-18 18:05:28
209.85.238.199 - - 2015-05-18 18:05:15
209.85.238.199 - - 2015-05-18 18:05:55
66.249.73.135 - - 2015-05-18 18:05:10
203.82.66.233 - - 2015-05-18 18:05:55
66.249.73.135 - - 2015-05-18 18:05:17
46.105.14.53 - - 20

128.61.92.96 - - 2015-05-18 20:05:04
128.61.92.96 - - 2015-05-18 20:05:52
128.61.92.96 - - 2015-05-18 20:05:09
27.251.237.154 - - 2015-05-18 20:05:23
27.251.237.154 - - 2015-05-18 20:05:25
27.251.237.154 - - 2015-05-18 20:05:54
27.251.237.154 - - 2015-05-18 20:05:04
27.251.237.154 - - 2015-05-18 20:05:53
209.85.238.199 - - 2015-05-18 20:05:51
59.163.27.11 - - 2015-05-18 20:05:22
143.233.204.28 - - 2015-05-18 20:05:07
143.233.204.28 - - 2015-05-18 20:05:41
143.233.204.28 - - 2015-05-18 20:05:39
46.105.14.53 - - 2015-05-18 20:05:28
50.16.19.13 - - 2015-05-18 20:05:35
66.249.73.135 - - 2015-05-18 20:05:08
66.249.73.185 - - 2015-05-18 20:05:09
203.173.241.145 - - 2015-05-18 20:05:04
68.180.224.225 - - 2015-05-18 20:05:47
91.207.8.61 - - 2015-05-18 20:05:37
91.207.8.61 - - 2015-05-18 20:05:54
31.208.44.206 - - 2015-05-18 20:05:29
66.249.73.135 - - 2015-05-18 20:05:43
208.115.111.72 - - 2015-05-18 20:05:35
37.59.47.177 - - 2015-05-18 20:05:00
46.105.14.53 - - 2015-05-18 20:05:32
110.143.13.2

185.4.253.67 - - 2015-05-18 23:05:12
106.51.250.126 - - 2015-05-18 23:05:21
185.4.253.67 - - 2015-05-18 23:05:50
185.4.253.67 - - 2015-05-18 23:05:39
185.4.253.67 - - 2015-05-18 23:05:48
185.4.253.67 - - 2015-05-18 23:05:49
106.51.250.126 - - 2015-05-18 23:05:12
106.51.250.126 - - 2015-05-18 23:05:04
106.51.250.126 - - 2015-05-18 23:05:40
106.51.250.126 - - 2015-05-18 23:05:10
185.4.253.67 - - 2015-05-18 23:05:07
185.4.253.67 - - 2015-05-18 23:05:53
185.4.253.67 - - 2015-05-18 23:05:26
185.4.253.67 - - 2015-05-18 23:05:13
106.51.250.126 - - 2015-05-18 23:05:47
66.249.73.135 - - 2015-05-18 23:05:58
77.1.77.21 - - 2015-05-18 23:05:07
77.1.77.21 - - 2015-05-18 23:05:57
77.1.77.21 - - 2015-05-18 23:05:46
77.1.77.21 - - 2015-05-18 23:05:45
77.1.77.21 - - 2015-05-18 23:05:33
77.1.77.21 - - 2015-05-18 23:05:28
194.249.247.164 - - 2015-05-18 23:05:14
194.249.247.164 - - 2015-05-18 23:05:08
194.249.247.164 - - 2015-05-18 23:05:02
194.249.247.164 - - 2015-05-18 23:05:41
194.249.247.164 - - 2015-

89.238.251.138 - - 2015-05-19 01:05:20
195.14.72.29 - - 2015-05-19 01:05:41
66.249.73.135 - - 2015-05-19 01:05:29
59.90.241.113 - - 2015-05-19 01:05:18
59.90.241.113 - - 2015-05-19 01:05:13
59.90.241.113 - - 2015-05-19 01:05:46
66.249.73.135 - - 2015-05-19 01:05:02
198.46.149.143 - - 2015-05-19 01:05:00
198.46.149.143 - - 2015-05-19 01:05:08
110.184.146.254 - - 2015-05-19 01:05:35
110.184.146.254 - - 2015-05-19 01:05:58
110.184.146.254 - - 2015-05-19 01:05:45
110.184.146.254 - - 2015-05-19 01:05:58
115.112.233.75 - - 2015-05-19 01:05:10
110.184.146.254 - - 2015-05-19 01:05:01
115.112.233.75 - - 2015-05-19 01:05:06
115.112.233.75 - - 2015-05-19 01:05:53
115.112.233.75 - - 2015-05-19 01:05:02
115.112.233.75 - - 2015-05-19 01:05:35
115.112.233.75 - - 2015-05-19 01:05:46
110.184.146.254 - - 2015-05-19 01:05:14
74.80.168.244 - - 2015-05-19 01:05:30
107.170.40.204 - - 2015-05-19 01:05:31
74.80.168.244 - - 2015-05-19 01:05:02
209.85.238.199 - - 2015-05-19 01:05:09
110.184.146.254 - - 2015-05-

41.74.172.23 - - 2015-05-19 04:05:38
41.74.172.23 - - 2015-05-19 04:05:26
41.74.172.23 - - 2015-05-19 04:05:01
24.11.96.184 - - 2015-05-19 04:05:49
46.105.14.53 - - 2015-05-19 04:05:27
106.187.34.32 - - 2015-05-19 04:05:33
106.187.34.32 - - 2015-05-19 04:05:03
106.187.34.32 - - 2015-05-19 04:05:33
106.187.34.32 - - 2015-05-19 04:05:04
106.187.34.32 - - 2015-05-19 04:05:53
106.187.34.32 - - 2015-05-19 04:05:56
188.205.61.177 - - 2015-05-19 04:05:59
188.205.61.177 - - 2015-05-19 04:05:26
188.205.61.177 - - 2015-05-19 04:05:36
188.205.61.177 - - 2015-05-19 04:05:22
188.205.61.177 - - 2015-05-19 04:05:27
188.205.61.177 - - 2015-05-19 04:05:56
193.105.0.2 - - 2015-05-19 04:05:19
95.128.40.244 - - 2015-05-19 04:05:09
50.16.19.13 - - 2015-05-19 04:05:38
74.125.40.23 - - 2015-05-19 04:05:48
198.46.149.143 - - 2015-05-19 04:05:58
198.46.149.143 - - 2015-05-19 04:05:37
66.249.73.135 - - 2015-05-19 04:05:43
97.74.24.112 - - 2015-05-19 04:05:19
121.244.49.42 - - 2015-05-19 04:05:36
68.180.224.225 

85.248.108.8 - - 2015-05-19 06:05:49
85.248.108.8 - - 2015-05-19 06:05:20
85.248.108.8 - - 2015-05-19 06:05:18
85.248.108.8 - - 2015-05-19 06:05:17
85.248.108.8 - - 2015-05-19 06:05:28
85.248.108.8 - - 2015-05-19 06:05:26
118.186.69.62 - - 2015-05-19 06:05:54
118.186.69.62 - - 2015-05-19 06:05:23
74.125.19.82 - - 2015-05-19 06:05:27
173.252.74.113 - - 2015-05-19 06:05:28
173.252.74.119 - - 2015-05-19 06:05:44
173.252.74.112 - - 2015-05-19 06:05:23
173.252.77.116 - - 2015-05-19 06:05:31
122.57.173.238 - - 2015-05-19 06:05:51
162.210.196.130 - - 2015-05-19 06:05:28
162.210.196.130 - - 2015-05-19 06:05:16
66.249.73.135 - - 2015-05-19 06:05:43
107.170.40.198 - - 2015-05-19 06:05:44
46.105.14.53 - - 2015-05-19 06:05:24
89.216.30.194 - - 2015-05-19 06:05:53
89.216.30.194 - - 2015-05-19 06:05:55
89.216.30.194 - - 2015-05-19 06:05:09
89.216.30.194 - - 2015-05-19 06:05:20
89.216.30.194 - - 2015-05-19 06:05:10
89.216.30.194 - - 2015-05-19 06:05:23
89.216.30.194 - - 2015-05-19 06:05:00
62.24.122.

93.17.51.134 - - 2015-05-19 08:05:09
93.17.51.134 - - 2015-05-19 08:05:24
93.17.51.134 - - 2015-05-19 08:05:10
93.17.51.134 - - 2015-05-19 08:05:19
93.17.51.134 - - 2015-05-19 08:05:58
93.17.51.134 - - 2015-05-19 08:05:02
93.17.51.134 - - 2015-05-19 08:05:06
93.17.51.134 - - 2015-05-19 08:05:24
93.17.51.134 - - 2015-05-19 08:05:01
93.17.51.134 - - 2015-05-19 08:05:48
93.17.51.134 - - 2015-05-19 08:05:54
93.17.51.134 - - 2015-05-19 08:05:38
93.17.51.134 - - 2015-05-19 08:05:23
93.17.51.134 - - 2015-05-19 08:05:07
93.17.51.134 - - 2015-05-19 08:05:52
93.17.51.134 - - 2015-05-19 08:05:07
93.17.51.134 - - 2015-05-19 08:05:36
93.17.51.134 - - 2015-05-19 08:05:51
93.17.51.134 - - 2015-05-19 08:05:15
93.17.51.134 - - 2015-05-19 08:05:34
93.17.51.134 - - 2015-05-19 08:05:04
93.17.51.134 - - 2015-05-19 08:05:24
93.17.51.134 - - 2015-05-19 08:05:11
93.17.51.134 - - 2015-05-19 08:05:58
93.17.51.134 - - 2015-05-19 08:05:54
93.17.51.134 - - 2015-05-19 08:05:38
93.17.51.134 - - 2015-05-19 08:05:21
9

85.254.143.114 - - 2015-05-19 10:05:47
85.254.143.114 - - 2015-05-19 10:05:02
66.249.73.135 - - 2015-05-19 10:05:52
89.191.52.88 - - 2015-05-19 11:05:41
201.242.142.135 - - 2015-05-19 11:05:10
68.180.224.225 - - 2015-05-19 11:05:20
186.93.30.106 - - 2015-05-19 11:05:15
78.173.140.106 - - 2015-05-19 11:05:57
182.68.184.78 - - 2015-05-19 11:05:42
1.22.35.226 - - 2015-05-19 11:05:46
182.68.184.78 - - 2015-05-19 11:05:26
1.22.35.226 - - 2015-05-19 11:05:07
1.22.35.226 - - 2015-05-19 11:05:36
186.93.16.149 - - 2015-05-19 11:05:57
208.91.156.11 - - 2015-05-19 11:05:16
1.22.35.226 - - 2015-05-19 11:05:49
1.22.35.226 - - 2015-05-19 11:05:27
1.22.35.226 - - 2015-05-19 11:05:43
195.14.103.53 - - 2015-05-19 11:05:07
195.14.103.53 - - 2015-05-19 11:05:15
195.14.103.53 - - 2015-05-19 11:05:52
195.14.103.53 - - 2015-05-19 11:05:23
195.14.103.53 - - 2015-05-19 11:05:56
195.14.103.53 - - 2015-05-19 11:05:17
195.14.103.53 - - 2015-05-19 11:05:42
212.234.218.51 - - 2015-05-19 11:05:24
46.105.14.53 - - 2

130.237.218.86 - - 2015-05-19 13:05:33
130.237.218.86 - - 2015-05-19 13:05:13
130.237.218.86 - - 2015-05-19 13:05:30
130.237.218.86 - - 2015-05-19 13:05:41
130.237.218.86 - - 2015-05-19 13:05:21
130.237.218.86 - - 2015-05-19 13:05:22
130.237.218.86 - - 2015-05-19 13:05:01
130.237.218.86 - - 2015-05-19 13:05:19
130.237.218.86 - - 2015-05-19 13:05:50
130.237.218.86 - - 2015-05-19 13:05:09
130.237.218.86 - - 2015-05-19 13:05:01
130.237.218.86 - - 2015-05-19 13:05:50
130.237.218.86 - - 2015-05-19 13:05:46
130.237.218.86 - - 2015-05-19 13:05:59
130.237.218.86 - - 2015-05-19 13:05:23
130.237.218.86 - - 2015-05-19 13:05:50
130.237.218.86 - - 2015-05-19 13:05:30
130.237.218.86 - - 2015-05-19 13:05:03
130.237.218.86 - - 2015-05-19 13:05:37
130.237.218.86 - - 2015-05-19 13:05:24
130.237.218.86 - - 2015-05-19 13:05:43
130.237.218.86 - - 2015-05-19 13:05:34
130.237.218.86 - - 2015-05-19 13:05:15
141.89.226.149 - - 2015-05-19 13:05:29
141.89.226.149 - - 2015-05-19 13:05:03
63.223.125.175 - - 2015-0

150.101.171.78 - - 2015-05-19 15:05:17
150.101.171.78 - - 2015-05-19 15:05:36
150.101.171.78 - - 2015-05-19 15:05:18
150.101.171.78 - - 2015-05-19 15:05:24
66.249.73.135 - - 2015-05-19 15:05:14
208.91.156.11 - - 2015-05-19 15:05:40
74.125.176.147 - - 2015-05-19 15:05:26
46.105.14.53 - - 2015-05-19 15:05:29
209.85.238.199 - - 2015-05-19 15:05:59
110.159.23.90 - - 2015-05-19 15:05:46
110.159.23.90 - - 2015-05-19 15:05:22
110.159.23.90 - - 2015-05-19 15:05:31
110.159.23.90 - - 2015-05-19 15:05:59
110.159.23.90 - - 2015-05-19 15:05:30
110.159.23.90 - - 2015-05-19 15:05:05
110.159.23.90 - - 2015-05-19 15:05:04
110.159.23.90 - - 2015-05-19 15:05:13
66.249.73.135 - - 2015-05-19 15:05:58
66.249.73.135 - - 2015-05-19 15:05:51
78.6.176.46 - - 2015-05-19 15:05:17
78.6.176.46 - - 2015-05-19 15:05:50
78.6.176.46 - - 2015-05-19 15:05:03
78.6.176.46 - - 2015-05-19 15:05:43
78.6.176.46 - - 2015-05-19 15:05:13
78.6.176.46 - - 2015-05-19 15:05:17
180.76.6.51 - - 2015-05-19 15:05:27
194.149.247.24 - - 20

77.241.193.88 - - 2015-05-19 18:05:58
77.241.193.88 - - 2015-05-19 18:05:13
77.241.193.88 - - 2015-05-19 18:05:55
98.216.194.189 - - 2015-05-19 18:05:02
193.77.158.208 - - 2015-05-19 18:05:17
193.77.158.208 - - 2015-05-19 18:05:26
193.77.158.208 - - 2015-05-19 18:05:42
193.77.158.208 - - 2015-05-19 18:05:29
193.77.158.208 - - 2015-05-19 18:05:33
193.77.158.208 - - 2015-05-19 18:05:03
178.222.167.128 - - 2015-05-19 18:05:36
178.222.167.128 - - 2015-05-19 18:05:29
46.105.14.53 - - 2015-05-19 18:05:06
66.249.73.135 - - 2015-05-19 18:05:44
213.87.123.186 - - 2015-05-19 18:05:41
89.212.62.88 - - 2015-05-19 18:05:51
89.212.62.88 - - 2015-05-19 18:05:55
89.212.62.88 - - 2015-05-19 18:05:30
89.212.62.88 - - 2015-05-19 18:05:09
89.212.62.88 - - 2015-05-19 18:05:47
89.212.62.88 - - 2015-05-19 18:05:55
79.101.87.86 - - 2015-05-19 18:05:29
79.101.87.86 - - 2015-05-19 18:05:34
79.101.87.86 - - 2015-05-19 18:05:17
66.249.73.185 - - 2015-05-19 18:05:49
66.249.73.185 - - 2015-05-19 18:05:54
79.101.87.

69.167.130.5 - - 2015-05-19 20:05:08
69.167.130.5 - - 2015-05-19 20:05:15
69.167.130.5 - - 2015-05-19 20:05:30
69.167.130.5 - - 2015-05-19 20:05:39
69.167.130.5 - - 2015-05-19 20:05:26
69.167.130.5 - - 2015-05-19 20:05:57
69.167.130.5 - - 2015-05-19 20:05:14
193.24.88.211 - - 2015-05-19 20:05:09
193.24.88.211 - - 2015-05-19 20:05:43
193.24.88.211 - - 2015-05-19 20:05:45
193.24.88.211 - - 2015-05-19 20:05:20
193.24.88.211 - - 2015-05-19 20:05:23
146.1.1.2 - - 2015-05-19 20:05:24
193.24.88.211 - - 2015-05-19 20:05:13
180.76.6.50 - - 2015-05-19 20:05:48
146.1.1.2 - - 2015-05-19 20:05:49
68.180.224.225 - - 2015-05-19 20:05:56
193.24.88.211 - - 2015-05-19 20:05:30
74.125.19.208 - - 2015-05-19 20:05:49
107.170.40.204 - - 2015-05-19 20:05:07
213.124.13.242 - - 2015-05-19 20:05:57
213.124.13.242 - - 2015-05-19 20:05:08
66.249.73.135 - - 2015-05-19 20:05:24
83.31.36.250 - - 2015-05-19 20:05:40
83.31.36.250 - - 2015-05-19 20:05:37
105.224.234.235 - - 2015-05-19 20:05:40
209.85.238.199 - - 2015-0

213.193.72.155 - - 2015-05-19 22:05:22
213.193.72.155 - - 2015-05-19 22:05:44
213.193.72.155 - - 2015-05-19 22:05:35
107.170.40.198 - - 2015-05-19 22:05:30
213.193.72.155 - - 2015-05-19 22:05:11
139.184.30.132 - - 2015-05-19 22:05:23
46.105.14.53 - - 2015-05-19 22:05:57
66.249.83.239 - - 2015-05-19 22:05:03
66.249.83.239 - - 2015-05-19 22:05:16
68.180.224.225 - - 2015-05-19 22:05:50
208.91.156.11 - - 2015-05-19 22:05:30
217.195.202.13 - - 2015-05-19 22:05:03
5.235.199.240 - - 2015-05-19 22:05:32
130.237.218.86 - - 2015-05-19 22:05:18
130.237.218.86 - - 2015-05-19 22:05:55
130.237.218.86 - - 2015-05-19 22:05:41
130.237.218.86 - - 2015-05-19 22:05:40
130.237.218.86 - - 2015-05-19 22:05:54
130.237.218.86 - - 2015-05-19 22:05:29
130.237.218.86 - - 2015-05-19 22:05:44
130.237.218.86 - - 2015-05-19 22:05:00
130.237.218.86 - - 2015-05-19 22:05:54
130.237.218.86 - - 2015-05-19 22:05:35
130.237.218.86 - - 2015-05-19 22:05:49
130.237.218.86 - - 2015-05-19 22:05:35
130.237.218.86 - - 2015-05-19 2

130.237.218.86 - - 2015-05-20 01:05:40
130.237.218.86 - - 2015-05-20 01:05:20
130.237.218.86 - - 2015-05-20 01:05:12
130.237.218.86 - - 2015-05-20 01:05:35
130.237.218.86 - - 2015-05-20 01:05:06
130.237.218.86 - - 2015-05-20 01:05:47
130.237.218.86 - - 2015-05-20 01:05:48
130.237.218.86 - - 2015-05-20 01:05:03
130.237.218.86 - - 2015-05-20 01:05:36
130.237.218.86 - - 2015-05-20 01:05:28
130.237.218.86 - - 2015-05-20 01:05:10
130.237.218.86 - - 2015-05-20 01:05:53
130.237.218.86 - - 2015-05-20 01:05:07
130.237.218.86 - - 2015-05-20 01:05:27
130.237.218.86 - - 2015-05-20 01:05:52
130.237.218.86 - - 2015-05-20 01:05:07
130.237.218.86 - - 2015-05-20 01:05:49
130.237.218.86 - - 2015-05-20 01:05:44
130.237.218.86 - - 2015-05-20 01:05:08
130.237.218.86 - - 2015-05-20 01:05:23
130.237.218.86 - - 2015-05-20 01:05:19
130.237.218.86 - - 2015-05-20 01:05:35
130.237.218.86 - - 2015-05-20 01:05:52
130.237.218.86 - - 2015-05-20 01:05:27
130.237.218.86 - - 2015-05-20 01:05:13
130.237.218.86 - - 2015-0

83.206.120.18 - - 2015-05-20 03:05:03
152.23.4.70 - - 2015-05-20 03:05:53
152.23.4.70 - - 2015-05-20 03:05:13
128.118.108.67 - - 2015-05-20 03:05:52
83.61.80.53 - - 2015-05-20 03:05:39
83.61.80.53 - - 2015-05-20 03:05:40
83.61.80.53 - - 2015-05-20 03:05:19
83.61.80.53 - - 2015-05-20 03:05:28
83.61.80.53 - - 2015-05-20 03:05:45
83.61.80.53 - - 2015-05-20 03:05:10
178.118.111.33 - - 2015-05-20 03:05:56
178.118.111.33 - - 2015-05-20 03:05:19
178.118.111.33 - - 2015-05-20 03:05:56
178.118.111.33 - - 2015-05-20 03:05:21
178.118.111.33 - - 2015-05-20 03:05:04
178.118.111.33 - - 2015-05-20 03:05:08
66.249.73.185 - - 2015-05-20 03:05:43
91.231.179.253 - - 2015-05-20 03:05:08
91.231.179.253 - - 2015-05-20 03:05:39
91.231.179.253 - - 2015-05-20 03:05:57
91.231.179.253 - - 2015-05-20 03:05:03
91.231.179.253 - - 2015-05-20 03:05:52
91.231.179.253 - - 2015-05-20 03:05:40
208.93.0.48 - - 2015-05-20 03:05:52
46.105.14.53 - - 2015-05-20 03:05:06
50.16.19.13 - - 2015-05-20 03:05:58
50.2.225.202 - - 201

68.40.205.212 - - 2015-05-20 06:05:18
74.125.19.80 - - 2015-05-20 06:05:31
198.46.149.143 - - 2015-05-20 06:05:13
198.46.149.143 - - 2015-05-20 06:05:55
50.16.19.13 - - 2015-05-20 06:05:02
180.76.5.98 - - 2015-05-20 06:05:40
46.105.14.53 - - 2015-05-20 06:05:08
14.18.25.243 - - 2015-05-20 06:05:12
14.18.25.243 - - 2015-05-20 06:05:31
14.18.25.243 - - 2015-05-20 06:05:28
14.18.25.243 - - 2015-05-20 06:05:46
14.18.25.243 - - 2015-05-20 06:05:21
14.18.25.243 - - 2015-05-20 06:05:11
66.249.73.135 - - 2015-05-20 06:05:46
209.17.114.78 - - 2015-05-20 06:05:28
209.17.114.78 - - 2015-05-20 06:05:46
209.17.114.78 - - 2015-05-20 06:05:23
209.17.114.78 - - 2015-05-20 06:05:05
178.255.215.71 - - 2015-05-20 06:05:11
109.109.46.88 - - 2015-05-20 06:05:26
157.55.33.19 - - 2015-05-20 06:05:37
192.254.138.148 - - 2015-05-20 06:05:20
46.105.14.53 - - 2015-05-20 06:05:10
79.35.84.239 - - 2015-05-20 06:05:32
79.35.84.239 - - 2015-05-20 06:05:56
79.35.84.239 - - 2015-05-20 06:05:31
79.35.84.239 - - 2015-05

134.158.231.20 - - 2015-05-20 08:05:18
46.105.14.53 - - 2015-05-20 08:05:20
134.158.231.20 - - 2015-05-20 08:05:24
37.11.0.8 - - 2015-05-20 08:05:19
193.252.118.175 - - 2015-05-20 08:05:58
50.16.19.13 - - 2015-05-20 08:05:43
183.221.90.177 - - 2015-05-20 08:05:14
183.221.90.177 - - 2015-05-20 08:05:49
183.221.90.177 - - 2015-05-20 08:05:30
183.221.90.177 - - 2015-05-20 08:05:20
183.221.90.177 - - 2015-05-20 08:05:28
183.221.90.177 - - 2015-05-20 08:05:57
193.252.118.175 - - 2015-05-20 08:05:41
157.55.32.190 - - 2015-05-20 08:05:48
211.125.92.233 - - 2015-05-20 08:05:30
87.109.78.50 - - 2015-05-20 08:05:18
197.36.142.40 - - 2015-05-20 08:05:41
197.36.142.40 - - 2015-05-20 08:05:25
197.36.142.40 - - 2015-05-20 08:05:41
197.36.142.40 - - 2015-05-20 08:05:12
197.36.142.40 - - 2015-05-20 08:05:36
197.36.142.40 - - 2015-05-20 08:05:13
74.125.19.81 - - 2015-05-20 08:05:06
120.202.255.147 - - 2015-05-20 08:05:41
46.105.14.53 - - 2015-05-20 08:05:27
178.137.5.235 - - 2015-05-20 08:05:59
193.33.

109.231.204.82 - - 2015-05-20 10:05:27
209.85.238.199 - - 2015-05-20 10:05:35
46.105.14.53 - - 2015-05-20 10:05:30
80.130.134.87 - - 2015-05-20 10:05:10
80.130.134.87 - - 2015-05-20 10:05:23
200.10.161.5 - - 2015-05-20 10:05:22
200.10.161.5 - - 2015-05-20 10:05:59
92.236.181.94 - - 2015-05-20 10:05:58
92.236.181.94 - - 2015-05-20 10:05:05
92.236.181.94 - - 2015-05-20 10:05:47
92.236.181.94 - - 2015-05-20 10:05:44
92.236.181.94 - - 2015-05-20 10:05:56
92.236.181.94 - - 2015-05-20 10:05:16
65.19.138.33 - - 2015-05-20 10:05:17
66.249.73.135 - - 2015-05-20 10:05:42
50.16.19.13 - - 2015-05-20 10:05:58
209.17.114.78 - - 2015-05-20 11:05:08
24.0.194.37 - - 2015-05-20 11:05:00
180.76.5.189 - - 2015-05-20 11:05:03
46.105.14.53 - - 2015-05-20 11:05:50
24.0.194.37 - - 2015-05-20 11:05:15
24.0.194.37 - - 2015-05-20 11:05:19
24.0.194.37 - - 2015-05-20 11:05:00
24.0.194.37 - - 2015-05-20 11:05:16
24.0.194.37 - - 2015-05-20 11:05:01
24.0.194.37 - - 2015-05-20 11:05:12
24.0.194.37 - - 2015-05-20 11:05

189.11.65.66 - - 2015-05-20 13:05:16
189.11.65.66 - - 2015-05-20 13:05:19
189.11.65.66 - - 2015-05-20 13:05:42
189.11.65.66 - - 2015-05-20 13:05:19
80.160.68.134 - - 2015-05-20 13:05:27
66.249.73.135 - - 2015-05-20 13:05:04
188.192.27.241 - - 2015-05-20 13:05:02
208.115.111.72 - - 2015-05-20 13:05:18
157.55.33.15 - - 2015-05-20 13:05:32
184.151.222.45 - - 2015-05-20 13:05:57
46.105.14.53 - - 2015-05-20 13:05:28
66.249.73.135 - - 2015-05-20 13:05:19
173.213.99.1 - - 2015-05-20 13:05:47
173.213.99.1 - - 2015-05-20 13:05:42
180.76.5.22 - - 2015-05-20 13:05:58
208.91.156.11 - - 2015-05-20 13:05:55
50.16.19.13 - - 2015-05-20 13:05:51
198.27.64.9 - - 2015-05-20 13:05:48
187.45.193.158 - - 2015-05-20 13:05:36
68.180.224.225 - - 2015-05-20 13:05:34
198.27.64.9 - - 2015-05-20 13:05:16
128.118.108.67 - - 2015-05-20 13:05:01
128.179.155.97 - - 2015-05-20 13:05:41
66.249.73.135 - - 2015-05-20 13:05:18
68.14.231.140 - - 2015-05-20 13:05:50
128.179.155.97 - - 2015-05-20 13:05:07
216.172.140.128 - - 

107.170.9.55 - - 2015-05-20 15:05:05
74.125.40.20 - - 2015-05-20 15:05:48
49.206.120.190 - - 2015-05-20 15:05:57
49.206.120.190 - - 2015-05-20 15:05:32
49.206.120.190 - - 2015-05-20 15:05:21
49.206.120.190 - - 2015-05-20 15:05:10
49.206.120.190 - - 2015-05-20 15:05:37
82.60.18.23 - - 2015-05-20 15:05:31
82.60.18.23 - - 2015-05-20 15:05:32
5.10.83.65 - - 2015-05-20 15:05:27
46.105.14.53 - - 2015-05-20 15:05:43
24.190.3.5 - - 2015-05-20 15:05:57
74.76.53.142 - - 2015-05-20 15:05:21
66.249.73.135 - - 2015-05-20 15:05:10
68.180.224.225 - - 2015-05-20 15:05:16
68.180.224.225 - - 2015-05-20 15:05:49
173.162.242.85 - - 2015-05-20 15:05:02
68.180.224.225 - - 2015-05-20 15:05:32
128.118.108.67 - - 2015-05-20 15:05:15
119.224.20.139 - - 2015-05-20 15:05:38
74.76.53.142 - - 2015-05-20 15:05:57
74.76.53.142 - - 2015-05-20 15:05:01
74.76.53.142 - - 2015-05-20 15:05:55
74.76.53.142 - - 2015-05-20 15:05:15
74.76.53.142 - - 2015-05-20 15:05:53
66.249.73.135 - - 2015-05-20 15:05:47
68.180.224.225 - - 2

141.142.223.45 - - 2015-05-20 18:05:02
5.10.83.23 - - 2015-05-20 18:05:00
182.253.73.95 - - 2015-05-20 18:05:37
182.253.73.95 - - 2015-05-20 18:05:06
78.47.134.185 - - 2015-05-20 18:05:41
66.249.73.135 - - 2015-05-20 18:05:12
46.105.14.53 - - 2015-05-20 18:05:09
50.16.19.13 - - 2015-05-20 18:05:38
5.10.83.82 - - 2015-05-20 18:05:15
92.100.97.83 - - 2015-05-20 18:05:02
92.100.97.83 - - 2015-05-20 18:05:54
23.229.67.14 - - 2015-05-20 18:05:05
99.146.78.102 - - 2015-05-20 18:05:40
23.229.67.14 - - 2015-05-20 18:05:13
190.107.140.178 - - 2015-05-20 18:05:49
5.10.83.23 - - 2015-05-20 18:05:00
66.249.73.135 - - 2015-05-20 18:05:46
208.43.243.244 - - 2015-05-20 18:05:40
208.43.243.244 - - 2015-05-20 18:05:36
208.43.243.244 - - 2015-05-20 18:05:28
208.43.243.244 - - 2015-05-20 18:05:34
15.219.153.83 - - 2015-05-20 18:05:11
15.219.153.83 - - 2015-05-20 18:05:29
15.219.153.83 - - 2015-05-20 18:05:13
180.76.5.73 - - 2015-05-20 18:05:07
208.43.243.244 - - 2015-05-20 18:05:59
216.151.137.35 - - 201

120.136.4.243 - - 2015-05-20 21:05:13
66.249.73.185 - - 2015-05-20 21:05:45
195.194.187.106 - - 2015-05-20 21:05:22
66.249.73.185 - - 2015-05-20 21:05:09
66.249.73.185 - - 2015-05-20 21:05:32
176.31.39.30 - - 2015-05-20 21:05:42
176.31.39.30 - - 2015-05-20 21:05:17
116.199.211.249 - - 2015-05-20 21:05:16
46.105.14.53 - - 2015-05-20 21:05:39
5.10.83.53 - - 2015-05-20 21:05:07
66.249.73.135 - - 2015-05-20 21:05:59
92.115.179.247 - - 2015-05-20 21:05:46
92.115.179.247 - - 2015-05-20 21:05:31
92.115.179.247 - - 2015-05-20 21:05:35
92.115.179.247 - - 2015-05-20 21:05:01
92.115.179.247 - - 2015-05-20 21:05:34
92.115.179.247 - - 2015-05-20 21:05:35
5.10.83.53 - - 2015-05-20 21:05:59
46.119.114.245 - - 2015-05-20 21:05:28
173.231.106.34 - - 2015-05-20 21:05:19
66.169.220.99 - - 2015-05-20 21:05:03
66.249.73.135 - - 2015-05-20 21:05:37
50.16.19.13 - - 2015-05-20 21:05:43
5.10.83.21 - - 2015-05-20 21:05:36
208.91.156.11 - - 2015-05-20 21:05:05
66.249.73.135 - - 2015-05-20 21:05:18
66.249.73.135 

In [32]:
parsed_logs2.groupBy(lambda word: word[0])
parsed_logs2.take(2)

['83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"',
 '83.149.9.216 - - [17/May/2015:10:05:43 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1" 200 171717 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"']

In [33]:
parsed_logs2 = (sc.textFile(logFile))
parsed_logs2.take(2)

['83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"',
 '83.149.9.216 - - [17/May/2015:10:05:43 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1" 200 171717 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"']

In [8]:
parsed_logs2.take(1)[0]

'83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"'

In [22]:
not200 = access_logs.filter(lambda log: log.responseCode != 200)


AttributeError: 'PipelinedRDD' object has no attribute 'show'

In [21]:
parsedGroupBy.take(1)

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 15.0 failed 1 times, most recent failure: Lost task 0.0 in stage 15.0 (TID 25, host.docker.internal, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\worker.py", line 605, in main
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\worker.py", line 595, in process
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2596, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2596, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 425, in func
    return f(iterator)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2030, in combine
    merger.mergeValues(iterator)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\shuffle.py", line 238, in mergeValues
    for k, v in iterator:
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\util.py", line 107, in wrapper
    return f(*args, **kwargs)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 811, in <lambda>
    return self.map(lambda x: (f(x), x)).groupByKey(numPartitions, partitionFunc)
TypeError: 'str' object is not callable

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:503)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:638)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:621)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1209)
	at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1215)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:132)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2099)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2120)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2139)
	at org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:154)
	at org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\worker.py", line 605, in main
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\worker.py", line 595, in process
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2596, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2596, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 425, in func
    return f(iterator)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 2030, in combine
    merger.mergeValues(iterator)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\shuffle.py", line 238, in mergeValues
    for k, v in iterator:
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\lib\pyspark.zip\pyspark\util.py", line 107, in wrapper
    return f(*args, **kwargs)
  File "D:\spark\spark-3.0.1-bin-hadoop2.7\python\pyspark\rdd.py", line 811, in <lambda>
    return self.map(lambda x: (f(x), x)).groupByKey(numPartitions, partitionFunc)
TypeError: 'str' object is not callable

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:503)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:638)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:621)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1209)
	at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1215)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:132)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	... 1 more


Total session time:  3 days, 11:00:59


In [11]:
logFile = "apache_logs.txt"
file = sc.textFile(logFile)

In [13]:
print(file.first())

83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"


In [3]:
# A regular expression pattern to extract fields from the log line
APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+)\s*(\S*)" (\d{3}) (\S+)'

In [4]:
import re
import datetime

from pyspark.sql import Row

month_map = {'Jan': 1, 'Feb': 2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7,
    'Aug':8,  'Sep': 9, 'Oct':10, 'Nov': 11, 'Dec': 12}

def parse_apache_time(s):
    """ Convert Apache time format into a Python datetime object
    Args:
        s (str): date and time in Apache time format
    Returns:
        datetime: datetime object (ignore timezone for now)
    """
    return datetime.datetime(int(s[7:11]),
                             month_map[s[3:6]],
                             int(s[0:2]),
                             int(s[12:14]),
                             int(s[15:17]),
                             int(s[18:20]))


def parseApacheLogLine(logline):
    """ Parse a line in the Apache Common Log format
    Args:
        logline (str): a line of text in the Apache Common Log format
    Returns:
        tuple: either a dictionary containing the parts of the Apache Access Log and 1,
               or the original invalid log line and 0
    """
    match = re.search(APACHE_ACCESS_LOG_PATTERN, logline)
    if match is None:
        return (logline, 0)
    size_field = match.group(9)
    if size_field == '-':
        size = 0
    else:
        size = match.group(9)
    return (Row(
        host          = match.group(1),
        client_identd = match.group(2),
        user_id       = match.group(3),
        date_time     = parse_apache_time(match.group(4)),
        method        = match.group(5),
        endpoint      = match.group(6),
        protocol      = match.group(7),
        response_code = int(match.group(8)),
        content_size  = size
    ), 1)



In [11]:
logFile = "apache_logs.txt"
parsed_logs = (sc.textFile(logFile).map(parseApacheLogLine).cache())
access_logs = (parsed_logs.filter(lambda s: s[1] == 1).map(lambda s: s[0]).cache())
failed_logs = (parsed_logs.filter(lambda s: s[1] == 0).map(lambda s: s[0]))
failed_logs_count = failed_logs.count()

if failed_logs_count > 0:
    print ('Number of invalid logline: %d' % failed_logs.count())
    for line in failed_logs.take(20):
        print ('Invalid logline: %s' % line)

print ('Read %d lines, successfully parsed %d lines, failed to parse %d lines' % (parsed_logs.count(), access_logs.count(), failed_logs.count()))


Read 10000 lines, successfully parsed 10000 lines, failed to parse 0 lines


In [10]:
print(failed_logs.count())

0


In [18]:
logFile = "apache_logs.txt"

def parseLogs():
    """ Read and parse log file """
    parsed_logs = (sc.textFile(logFile).map(parseApacheLogLine).cache())

    access_logs = (parsed_logs
                   .filter(lambda s: s[1] == 1)
                   .map(lambda s: s[0])
                   .cache())

    failed_logs = (parsed_logs
                   .filter(lambda s: s[1] == 0)
                   .map(lambda s: s[0]))
    failed_logs_count = failed_logs.count()
    if failed_logs_count > 0:
        print ('Number of invalid logline: %d' % failed_logs.count())
        for line in failed_logs.take(20):
            print ('Invalid logline: %s' % line)

    print ('Read %d lines, successfully parsed %d lines, failed to parse %d lines' % (parsed_logs.count(), access_logs.count(), failed_logs.count()))
    return parsed_logs, access_logs, failed_logs


parsed_logs, access_logs, failed_logs = parseLogs()




Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 2.0 failed 1 times, most recent failure: Lost task 1.0 in stage 2.0 (TID 4, host.docker.internal, executor driver): java.net.SocketException: Connection reset
	at java.net.SocketInputStream.read(SocketInputStream.java:209)
	at java.net.SocketInputStream.read(SocketInputStream.java:141)
	at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
	at java.io.BufferedInputStream.read(BufferedInputStream.java:265)
	at java.io.DataInputStream.readInt(DataInputStream.java:387)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:628)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:621)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1388)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1298)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1362)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1186)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:360)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:311)
	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2099)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2120)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2139)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2164)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1004)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1003)
	at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:168)
	at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.SocketException: Connection reset
	at java.net.SocketInputStream.read(SocketInputStream.java:209)
	at java.net.SocketInputStream.read(SocketInputStream.java:141)
	at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
	at java.io.BufferedInputStream.read(BufferedInputStream.java:265)
	at java.io.DataInputStream.readInt(DataInputStream.java:387)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:628)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:621)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1388)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1298)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1362)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1186)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:360)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:311)
	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	... 1 more


In [8]:
  
import re
from pyspark.sql import Row

In [9]:
APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) (\S+)" (\d{3}) (\d+)'

In [None]:
# Returns a dictionary containing the parts of the Apache Access Log.
def parse_apache_log_line(logline):
    match = re.search(APACHE_ACCESS_LOG_PATTERN, logline)
    if match is None:
        raise Error("Invalid logline: %s" % logline)
    return Row(
        ip_address    = match.group(1),
        client_identd = match.group(2),
        user_id       = match.group(3),
        date_time     = match.group(4),
        method        = match.group(5),
        endpoint      = match.group(6),
        protocol      = match.group(7),
        response_code = int(match.group(8)),
        content_size  = long(match.group(9))
    )