/
middleware.py
185 lines (152 loc) · 7.01 KB
/
middleware.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from datetime import datetime, timedelta
import logging
import re
import traceback
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.core.cache import cache
from django.core.urlresolvers import reverse, NoReverseMatch
from django.db.utils import DatabaseError
from django.http import Http404
from tracking import utils
from tracking.models import Visitor, UntrackedUserAgent, BannedIP
title_re = re.compile('<title>(.*?)</title>')
log = logging.getLogger('tracking.middleware')
class VisitorTrackingMiddleware(object):
"""
Keeps track of your active users. Anytime a visitor accesses a valid URL,
their unique record will be updated with the page they're on and the last
time they requested a page.
Records are considered to be unique when the session key and IP address
are unique together. Sometimes the same user used to have two different
records, so I added a check to see if the session key had changed for the
same IP and user agent in the last 5 minutes
"""
@property
def prefixes(self):
"""Returns a list of URL prefixes that we should not track"""
if not hasattr(self, '_prefixes'):
self._prefixes = getattr(settings, 'NO_TRACKING_PREFIXES', [])
if not getattr(settings, '_FREEZE_TRACKING_PREFIXES', False):
for name in ('MEDIA_URL', 'STATIC_URL'):
url = getattr(settings, name)
if url and url != '/':
self._prefixes.append(url)
try:
# finally, don't track requests to the tracker update pages
self._prefixes.append(reverse('tracking-refresh-active-users'))
except NoReverseMatch:
# django-tracking hasn't been included in the URLconf if we
# get here, which is not a bad thing
pass
settings.NO_TRACKING_PREFIXES = self._prefixes
settings._FREEZE_TRACKING_PREFIXES = True
return self._prefixes
def process_request(self, request):
# don't process AJAX requests
if request.is_ajax(): return
# create some useful variables
ip_address = utils.get_ip(request)
user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')
# retrieve untracked user agents from cache
ua_key = '_tracking_untracked_uas'
untracked = cache.get(ua_key)
if untracked is None:
log.info('Updating untracked user agent cache')
untracked = UntrackedUserAgent.objects.all()
cache.set(ua_key, untracked, 3600)
# see if the user agent is not supposed to be tracked
for ua in untracked:
# if the keyword is found in the user agent, stop tracking
if user_agent.find(ua.keyword) != -1:
log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
return
if hasattr(request, 'session') and request.session.session_key:
# use the current session key if we can
session_key = request.session.session_key
else:
# otherwise just fake a session key
session_key = '%s:%s' % (ip_address, user_agent)
session_key = session_key[:40]
# ensure that the request.path does not begin with any of the prefixes
for prefix in self.prefixes:
if request.path.startswith(prefix):
log.debug('Not tracking request to: %s' % request.path)
return
# if we get here, the URL needs to be tracked
# determine what time it is
now = datetime.now()
attrs = {
'session_key': session_key,
'ip_address': ip_address
}
# for some reason, Visitor.objects.get_or_create was not working here
try:
visitor = Visitor.objects.get(**attrs)
except Visitor.DoesNotExist:
# see if there's a visitor with the same IP and user agent
# within the last 5 minutes
cutoff = now - timedelta(minutes=5)
visitors = Visitor.objects.filter(
ip_address=ip_address,
user_agent=user_agent,
last_update__gte=cutoff
)
if len(visitors):
visitor = visitors[0]
visitor.session_key = session_key
log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
else:
# it's probably safe to assume that the visitor is brand new
visitor = Visitor(**attrs)
log.debug('Created a new visitor: %s' % attrs)
except:
return
# determine whether or not the user is logged in
user = request.user
if isinstance(user, AnonymousUser):
user = None
# update the tracking information
visitor.user = user
visitor.user_agent = user_agent
# if the visitor record is new, or the visitor hasn't been here for
# at least an hour, update their referrer URL
one_hour_ago = now - timedelta(hours=1)
if not visitor.last_update or visitor.last_update <= one_hour_ago:
visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])
# reset the number of pages they've been to
visitor.page_views = 0
visitor.session_start = now
visitor.url = request.path
visitor.page_views += 1
visitor.last_update = now
try:
visitor.save()
except DatabaseError:
log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
class VisitorCleanUpMiddleware:
"""Clean up old visitor tracking records in the database"""
def process_request(self, request):
timeout = utils.get_cleanup_timeout()
if str(timeout).isdigit():
log.debug('Cleaning up visitors older than %s hours' % timeout)
timeout = datetime.now() - timedelta(hours=int(timeout))
Visitor.objects.filter(last_update__lte=timeout).delete()
class BannedIPMiddleware:
"""
Raises an Http404 error for any page request from a banned IP. IP addresses
may be added to the list of banned IPs via the Django admin.
The banned users do not actually receive the 404 error--instead they get
an "Internal Server Error", effectively eliminating any access to the site.
"""
def process_request(self, request):
key = '_tracking_banned_ips'
ips = cache.get(key)
if ips is None:
# compile a list of all banned IP addresses
log.info('Updating banned IPs cache')
ips = [b.ip_address for b in BannedIP.objects.all()]
cache.set(key, ips, 3600)
# check to see if the current user's IP address is in that list
if utils.get_ip(request) in ips:
raise Http404