Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Version 1.5

  • Loading branch information...
commit e4b56b68ec5d88e9292c9ff2b5c864cf52eaa97e 1 parent 5617a53
Clearcode authored
7 README → README.md
View
@@ -1,4 +1,4 @@
-Google2Piwik Exporter, version 1.4, October 2012
+Google2Piwik Exporter, version 1.5, February 2013
Description
===========
@@ -30,8 +30,9 @@ Before running the script please be sure to:
After export
============
-After successful export, please go to your Piwik installation folder
-and run misc/cron/archive.sh script.
+After successful export, please go to your Piwik installation folder
+and run:
+/usr/bin/php5 /path/to/piwik/misc/cron/archive.php -- url=http://example.org/piwik/
Limitations
===========
75 action.py
View
@@ -4,13 +4,18 @@
# @link http://clearcode.cc/
# @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
#
+import urlparse
+import zlib
+import re
import sql
-TYPE_ACTION_URL = 1
+
+TYPE_ACTION_URL = 1
TYPE_ACTION_NAME = 4
-class Action(object):
+
+class Action(object):
def __init__(self, path, title, internal_id):
self.path = path
self.titles = [title]
@@ -18,61 +23,81 @@ def __init__(self, path, title, internal_id):
self.exported = False
self.pageviews = 0
self.timeleft = 0
-
+
def __repr__(self):
return str(self.titles)
def get_title(self):
for title in self.titles:
- if title != None: return title
+ if title is not None:
+ return title
return ""
-
- def export(self, base_path):
- path = base_path + self.path
+
+ def export(self, base_path, version):
+ path = urlparse.urljoin(base_path, self.path)
title = self.get_title()
-
- type_url = ( path, self.get_hash(path), TYPE_ACTION_URL )
- type_name = ( title, self.get_hash(title + path), TYPE_ACTION_NAME )
-
- self.id_action_url = sql.insert_log_action(type_url)
- self.id_action_name = sql.insert_log_action(type_name)
+ url = re.sub(r'^http(s)?://(www.)?', '', path)
+ url_prefix = self.get_url_prefix(path)
+ type_url = (url, self.get_hash(url), TYPE_ACTION_URL, url_prefix)
+ type_name = (title, self.get_hash(title), TYPE_ACTION_NAME, None)
+
+ self.id_action_url = sql.insert_log_action(type_url, version)
+ self.id_action_name = sql.insert_log_action(type_name, version)
self.exported = True
-
+
def get_hash(self, value):
- return abs(hash(value))
-
+ return zlib.crc32(value.encode('utf-8')) & 0xffffffff
+
+ def get_url_prefix(self, path):
+ '''
+ Return a valid Piwik url_prefix for 'log_action' table
+ It returns one of four values:
+ 0 if path starts with 'http://'
+ 1 if path starts with 'http://www.'
+ 2 if path starts with 'https://'
+ 3 if path starts with 'https://www.'
+ '''
+ path = urlparse.urlsplit(path)
+ ssl = True if path.scheme == 'https' else False
+
+ if path.netloc.startswith('www'):
+ return 3 if ssl else 1
+ else:
+ return 2 if ssl else 0
+
+
class ActionManager(object):
def __init__(self):
""" self.actions will be path -> Action mapper """
self.actions = {}
self.__counter = 0
-
+
def add_action(self, path, title):
if path in self.actions:
if not title in self.actions[path].titles:
self.actions[path].titles.append(title)
else:
self.actions[path] = Action(path, title, self.counter_next())
-
+
def __getitem__(self, path):
- if path == None:
+ if path is None:
return None
if not path in self.actions:
self.actions[path] = Action(path, None, self.counter_next())
return self.actions[path]
-
+
def counter_next(self):
self.__counter += 1
return self.__counter
-
+
@property
def toExport(self):
""" Returns list of not yet exported actions"""
- return filter(lambda x: self.actions[x].exported == False, self.actions)
+ return filter(lambda x: self.actions[x].exported is False,
+ self.actions)
- def export(self, base_path):
+ def export(self, base_path, version):
for action in self.toExport:
- self.actions[action].export(base_path)
-
+ self.actions[action].export(base_path, version)
155 google2piwik.py
View
@@ -3,14 +3,14 @@
#
# Google2Piwik - exporting Google Analytics to Piwik
-#
+#
# @link http://clearcode.cc/
# @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
#
## google2piwik v1.1, Copyright (C) 2011 by Clearcode (http://clearcode.cc)
# Tested to work on
-# Python 2.6.4 (r264:75706, Dec 7 2009, 18:45:15)
+# Python 2.6.4 (r264:75706, Dec 7 2009, 18:45:15)
# [GCC 4.4.1] on linux2
import gdata.analytics.client
@@ -21,6 +21,7 @@
import config
import sql
+from distutils.version import StrictVersion
from itertools import chain, cycle
from hashlib import md5
import datetime
@@ -40,7 +41,7 @@ def VER_FLUSHED(message, level=0):
sys.stdout.write("\b"*len(message))
sys.stdout.write(message)
sys.stdout.flush()
-
+
def read_date(strdate):
try:
date_re = re.compile("(\d{4})-(\d{1,2})-(\d{1,2})")
@@ -50,18 +51,18 @@ def read_date(strdate):
raise Exception("Wrong date %s, check format (YYYY-MM-DD)" % strdate)
except ValueError:
raise Exception("Not valid date given %s" % strdate)
-
+
def export_period(start, end = None):
global action_manager, hash_generator
""" It's best not to export current day """
if not end:
end = datetime.date.today() - datetime.timedelta(days=1)
-
+
print "Please wait, this process - depending on popularity of your site may take some time."
fetcher = GoogleFeedFetcher( config.GOOGLE_TABLE_ID )
currentdate = start
enddate = end
-
+
action_manager = create_action_manager( fetcher, str(currentdate), str(enddate) )
hash_generator = create_visit_hash_generator( fetcher, str(currentdate), str(enddate) )
@@ -72,8 +73,9 @@ def export_period(start, end = None):
else:
VERBOSE("Export failed. Visit log for that day is not empty.")
currentdate += datetime.timedelta(days=1)
-
+
sql.update_visit_actions(start, end + datetime.timedelta(days=1))
+ sql.finalize()
def create_action_manager(fetcher, start, end):
action_manager = ActionManager()
@@ -81,7 +83,7 @@ def create_action_manager(fetcher, start, end):
fetcher.FeedFetchSpecial("ga:pagePath,ga:pageTitle", "ga:pageviews", start, end)
for act in fetcher.FeedToDict():
action_manager.add_action(act["ga:pagePath"], act["ga:pageTitle"])
-
+
return action_manager
def create_visit_hash_generator(fetcher, start, end):
@@ -106,10 +108,10 @@ def create_visit_hash_generator(fetcher, start, end):
def export_day(day, fetcher):
day_export_start = datetime.datetime.now()
simulator = VisitSimulator(fetcher.getVisits(day), day)
-
+
if simulator.visit_limit == 0:
return #nothing to export
-
+
VERBOSE("VISIT: Initialize", 2)
simulator.initialize(fetcher, "ga:latitude,ga:longitude,ga:hour,ga:flashVersion,ga:javaEnabled,ga:language,ga:screenResolution", "ga:visits")
@@ -123,12 +125,12 @@ def export_day(day, fetcher):
VERBOSE("VISIT: Fetch landing, exits", 2)
simulator.update(fetcher, "ga:exitPagePath,ga:landingPagePath,ga:latitude,ga:longitude,ga:hour","ga:entrances")
VERBOSE("ACTION: Export paths", 2)
- action_manager.export(config.SITE_BASE_URL)
+ action_manager.export(config.SITE_BASE_URL, CURRENT_VERSION)
VERBOSE("ACTION: Completed", 2)
additional = {"idsite" : config.ID_SITE, "visit_server_date" : day}
-
+
simulator.finalize(additional)
-
+
"""
Export views to log_view
"""
@@ -136,7 +138,7 @@ def export_day(day, fetcher):
for v in simulator.visits:
v.idvisit = sql.insert_log_visit(v.visit_log, CURRENT_VERSION)
VERBOSE("VISIT: Completed", 2)
-
+
"""
Simulate actions by taking every page (with pageviews and timeOnPage info) and inserting to log_link_visit_action
"""
@@ -148,7 +150,7 @@ def export_day(day, fetcher):
real_action.timeleft = float(action["metric"]["ga:timeOnPage"])
real_action.bounces = int(action["metric"]["ga:bounces"])
real_action.average = real_action.timeleft / real_action.pageviews if real_action.pageviews > 0 else real_action.timeleft
-
+
for v in xrange(len(simulator.visits)):
visit = simulator.visits[v]
if real_action.bounces == 0: continue
@@ -156,21 +158,21 @@ def export_day(day, fetcher):
real_action.bounces -= 1
visit.bounce = True
try:
- sql.insert_log_visit_action((visit.idvisit, visit.get_final_value("idcookie"),
+ sql.insert_log_visit_action((visit.idvisit, visit.get_final_value("idcookie"),
visit.get_final_value("visit_server_date"), config.ID_SITE,
real_action.id_action_url, real_action.id_action_url,
real_action.id_action_name, real_action.average))
except Exception, e:
print e
-
-
+
+
for action in pageViewDict:
real_action = action_manager[action["ga:pagePath"]]
real_action.pageviews = int(action["metric"]["ga:pageviews"])
real_action.timeleft = float(action["metric"]["ga:timeOnPage"])
real_action.bounces = int(action["metric"]["ga:bounces"])
real_action.average = real_action.timeleft / real_action.pageviews if real_action.pageviews > 0 else real_action.timeleft
-
+
candicates = filter(lambda v : not v.bounce, simulator.visits)
for view in xrange(real_action.pageviews - real_action.bounces):
visit = random.choice(candicates)
@@ -182,10 +184,10 @@ def export_day(day, fetcher):
except Exception, e:
print e
-
+
simulated_unique = len(set([visit.visit_log["config_md5config"] for visit in simulator.visits]))
VERBOSE("Number of simulated unique visits:\t%s" % simulated_unique)
-
+
VERBOSE("Real number of unique visits:\t%s" % fetcher.getUniqueVisitors(day))
VERBOSE("DAY EXPORT TIME (in seconds): %s" % (datetime.datetime.now() - day_export_start).seconds,2)
VERBOSE("")
@@ -194,9 +196,9 @@ def export_day(day, fetcher):
class Visit(object):
"""
- This object represents single visit on website. Contains methods updating
+ This object represents single visit on website. Contains methods updating
values from Google Analytics and changing them into Piwik Visit values.
-
+
Data taken from Google Analytics is contained in `google_data` dict.
Piwik representation of Visit is stored in `visit_log` dict.
"""
@@ -205,37 +207,36 @@ def __init__(self, params={}):
self.visit_log = {}
self.nb_updates = 0
self.bounce = False
-
+
def __repr__(self):
return str(self.visit_log)
def first(self, params):
self.google_data = params
self.nb_updates += 1
-
+
def update(self, params):
for key in params:
if not key in self.google_data:
self.google_data[key] = params[key]
self.nb_updates += 1
-
+
def compliance(self, other_params):
compliance_factor = 0
for key, value in self.google_data.iteritems():
compliance_factor += other_params.get(key) == value
-
+
return compliance_factor
-
+
def finalize(self, additional):
"""
This method changes Google Analytics fields and values into Piwik's and stores them in `visit_log` dictionary.
"""
self.visit_log.update(additional)
-
+
stable = ["ga:screenResolution", "ga:language", "ga:visitLength", "total_actions", "ga:visitCount", "ga:daysSinceLastVisit", "ga:city"]
for stable_dim in stable:
self.visit_log[dims.DMAP[stable_dim]] = self.google_data.get(stable_dim) or 0
-
self.set_final("ga:visitorType", vals.visitor_returning)
self.set_final("ga:flashVersion", vals.flash_present)
self.set_final("ga:javaEnabled", vals.java_present)
@@ -247,14 +248,13 @@ def finalize(self, additional):
self.set_final("ga:source", vals.referer_url)
self.set_final("ga:continent", vals.continent_name)
self.set_final("ga:region", vals.region_name)
-
- self.set_final_value("referer_type", vals.referer_type(self.google_data.get("ga:source"),
+ self.set_final_value("referer_type", vals.referer_type(self.google_data.get("ga:source"),
self.get_final_value("ga:keyword")))
-
+
self.set_final_value("referer_name", vals.referer_name(self.google_data.get("ga:source"),
self.get_final_value("referer_type")))
self.cut_final("referer_name", 70)
-
+
self.set_final_value("ga:browserVersion", vals.browser_version(self.get_final_value("ga:browser"),
self.google_data.get("ga:browserVersion")))
self.cut_final("ga:browserVersion", 20)
@@ -265,31 +265,31 @@ def finalize(self, additional):
landing_act_id = 0
self.set_final_value("ga:landingPagePath", landing_act_id)
-
+
try:
exit_action_id = action_manager[self.google_data["ga:exitPagePath"]].id_action_url
except:
exit_action_id = 0
self.set_final_value("ga:exitPagePath", exit_action_id)
-
+
self.visit_log["visit_first_action_time"] = self.visit_log["visit_last_action_time"] = \
"%s %s" % (self.visit_log["visit_server_date"], self.visit_log["visitor_localtime"])
-
+
os = vals.os_name(self.google_data.get("ga:operatingSystem"), self.google_data.get("ga:operatingSystemVersion"))
-
+
self.set_final_value("ga:operatingSystem", os)
self.set_final_value("md5config", hash_generator.get_md5(self.get_final_value("visit_server_date")))
self.set_final_value("idcookie", md5(self.get_final_value("md5config")).hexdigest())
def set_final(self, google_field, function):
self.visit_log[dims.DMAP[google_field]] = function(self.google_data.get(google_field))
-
+
def set_final_value(self, google_field, value):
"""
Shortcut for setting value to corresponding google_field
"""
self.visit_log[dims.DMAP[google_field]] = value
-
+
def get_final_value(self,google_field):
"""
Shortcut for getting value of corresponding google_field
@@ -300,8 +300,8 @@ def cut_final(self, google_field, max_length):
value = self.get_final_value(google_field)
if isinstance(value,str):
self.set_final_value(google_field, value[:max_length])
-
-
+
+
class VisitSimulator(object):
def __init__(self, nb_visits, day):
self.visit_limit = int(nb_visits)
@@ -321,13 +321,13 @@ def index_visit(self, visit, latitude, longitude, hour):
self.indexed[latitude][longitude] = {hour : [visit]}
else:
self.indexed[latitude] = {longitude:{hour : [visit]}}
-
+
def modify(self, visit_google_data):
"""
This method scans visits for most suitable with visit_google_data.
Use queries that contains common dimensions (like latitude and longitude), so
visit that covers most of values gets update.
-
+
During one VisitSimulator.update one visit can be updated only once
"""
max, visit_index = -1, None
@@ -354,12 +354,12 @@ def modify(self, visit_google_data):
if visit_index != None:
self.visits[visit_index].update(visit_google_data)
-
+
def initialize(self, fetcher, dimensions, metrics):
"""
Method used to populate visits with basic dimensions.
-
- metrics parameter should contain only one value in this case.
+
+ metrics parameter should contain only one value in this case.
eg. ga:visits
"""
fetcher.FeedFetch(dimensions, metrics, self.day)
@@ -372,7 +372,7 @@ def initialize(self, fetcher, dimensions, metrics):
self.index_visit(self.visits[index], visit["ga:latitude"], visit["ga:longitude"], visit["ga:hour"])
index += 1
self.nb_updates += 1
-
+
def update(self, fetcher, dimensions, metrics):
"""
Method to fetch and update visits with new dimensions.
@@ -394,20 +394,20 @@ def update(self, fetcher, dimensions, metrics):
VER_FLUSHED("%3s perc. finished. estimated %6d seconds left." % (current_step, timeleft), 2)
last_update = datetime.datetime.now()
current_step += step
-
+
self.nb_updates += 1
VERBOSE("", 2)
-
+
def finalize(self, additional):
"""
Finalizes every visit ( with additional dictionary ).
"""
for visit in self.visits:
visit.finalize(additional)
-
+
class VisitHashGenerator(object):
"""
- This class is used to simulate visitors uniqueness.
+ This class is used to simulate visitors uniqueness.
"""
def __init__(self):
self.total = 0
@@ -417,7 +417,7 @@ def __init__(self):
self.basket = []
self.current_month_year = "0000-00"
self.current_date = "0000-00-00"
-
+
def get_md5(self, date):
"""
date should be in form : YYYY-MM-DD
@@ -425,15 +425,15 @@ def get_md5(self, date):
if date != self.current_date:
self.current_date = date
self.populate_current_date()
-
+
md5string = "%s;%s" % (self.random_prefix, self.date_basket.next())
return md5(md5string).hexdigest()
-
+
def populate_current_date(self):
if self.current_date[:7] != self.current_month_year:
self.current_month_year = self.current_date[:7]
self.populate_current_month()
-
+
not_yet_taken = filter(lambda h: not h in self.month_taken, self.month_basket)
if len(not_yet_taken) == 0:
self.date_basket = cycle(random.sample(self.month_basket, self.date_unique[self.current_date]))
@@ -444,11 +444,11 @@ def populate_current_date(self):
chosen = random.sample(not_yet_taken, self.date_unique[self.current_date])
self.month_taken.update(chosen)
self.date_basket = cycle(chosen)
-
+
def populate_current_month(self):
self.month_basket = xrange(self.month_unique[self.current_month_year])
self.month_taken = set()
-
+
class GoogleFeedFetcher(object):
"""
Class used to retrieve data from Google.
@@ -463,15 +463,15 @@ def __init__(self, table):
raise Exception('Invalid Google credentials given.')
except gdata.client.Error:
raise Exception('Login Error')
-
+
self.table_id = table
-
+
def FeedFetchSpecial(self, dimensions, metrics, day_start, day_end):
data_query = gdata.analytics.client.DataFeedQuery({
'ids': self.table_id,
'start-date': day_start,
'end-date': day_end,
- 'dimensions': dimensions,
+ 'dimensions': dimensions,
'metrics': metrics,
'max-results': '10000',
'key': config.GOOGLE_KEY})
@@ -482,7 +482,7 @@ def FeedFetch(self, dimensions, metrics, day):
'ids': self.table_id,
'start-date': day,
'end-date': day,
- 'dimensions': dimensions,
+ 'dimensions': dimensions,
'metrics': metrics,
'max-results': '10000',
'key': config.GOOGLE_KEY})
@@ -501,13 +501,13 @@ def getVisits(self, day):
return self.feed.entry[0].metric[0].value
except:
return 0
-
+
def checkAccess(self):
self.FeedFetch("", "ga:visits", datetime.date.today())
self.feed.entry[0].metric[0].value
return True
-
-
+
+
def FeedToDict(self, take_dimension = True, take_metric = True):
result = []
for entry in self.feed.entry:
@@ -523,7 +523,7 @@ def PrintTableIDs(self):
for entry in table_feed.entry:
print "Site: %30s \t table_id: %s" % (entry.GetProperty('ga:profileName').value,
entry.GetProperty('dxp:tableId').value)
-
+
def GetTableIDs(self):
account_query = gdata.analytics.client.ProfileQuery('~all', '~all',
{'key': config.GOOGLE_KEY})
@@ -539,9 +539,9 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
result["metric"] = {}
for met in entry.metric:
result["metric"][met.name] = met.value
-
+
return result
-
+
if __name__ == '__main__':
import optparse
import sys
@@ -555,13 +555,13 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
parser.add_option("-E", "--end-date", dest="end_date", default=None, metavar="DATE",
help="""set end date of export, this parameter is optional.
If not specified - yesterday's date will be used. DATE should be in form YYYY-MM-DD""")
- parser.add_option("-c", "--check", dest="check", default=False, action="store_true",
+ parser.add_option("-c", "--check", dest="check", default=False, action="store_true",
help="checks if configuration is valid, i.e. connects to MySQL database and Google account")
- parser.add_option("-u", "--update-visit", dest="update_visit_actions", default=False, action="store_true",
+ parser.add_option("-u", "--update-visit", dest="update_visit_actions", default=False, action="store_true",
help="updates visit total actions field (for some cases needed after export)")
- parser.add_option("-p", "--print-table-ids", dest="print_table_id", default=False, action="store_true",
+ parser.add_option("-p", "--print-table-ids", dest="print_table_id", default=False, action="store_true",
help="prints table_id for every site on your Google Analytics Account")
- parser.add_option("-C", "--clear-archives", dest="clear_archives", default=False, action="store_true",
+ parser.add_option("-C", "--clear-archives", dest="clear_archives", default=False, action="store_true",
help="Drops all archive tables in piwik database")
(options, args) = parser.parse_args(sys.argv)
@@ -593,7 +593,7 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
except:
print "[FAILED]"
exit()
- print
+ print
print "Checking Google Analytics"
#if not config.GOOGLE_USER.split("@")[1] == "gmail.com":
@@ -626,7 +626,7 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
except:
print "[FAILED]"
exit()
-
+
print "Checking tables:",
failed_tables = sql.check_tables(config.MYSQL_CREDENTIALS["table_prefix"])
if len(failed_tables) == 0:
@@ -649,9 +649,12 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
start_date = read_date(options.start_date or config.CONFIG_START)
end_date = None if not (options.end_date or config.CONFIG_END) else read_date(options.end_date or config.CONFIG_END)
sql.initialize(config.MYSQL_CREDENTIALS)
+
CURRENT_VERSION = sql.get_version(config.MYSQL_CREDENTIALS["table_prefix"])
- if (CURRENT_VERSION < 1.9):
+ if StrictVersion(CURRENT_VERSION) < StrictVersion('1.9'):
CURRENT_VERSION = 1.8
+ else:
+ CURRENT_VERSION = 1.9
if options.update_visit_actions:
sql.update_total_visit_actions()
@@ -660,6 +663,6 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
sql.update_site_ts_created(config.ID_SITE, start_date)
export_period(start_date, end_date)
-
+
sql.clear_archives()
print "Please go to your Piwik installation folder and run misc/cron/archive.sh script."
22 mappers/values.py
View
@@ -33,18 +33,18 @@ def browser_name(name):
def browser_version(browser, version):
""" Function converts long version description i.e. 3.6.13 (Firefox) to 3.6 """
-
+
b_versions = {"FF" : lambda x : '.'.join(x.split('.')[:2]),
"CH" : lambda x : '.'.join(x.split('.')[:2])}
-
+
safari_webkit_versions = {"533.19.4" : "5.0.3", "533.18.5" : "5.0.2", "533.17.8" : "5.0.1",
"533.16" : "5.0", "531.22.7" : "4.0.5", "531.21.10" : "4.0.4",
"531.9.1" : "4.0.3", "530.19.1" : "4.0.2", "530.17" : "4.0.1",
"528.17" : "4.0", "528.16" : "4.0", "528.1.1" : "4.0"}
-
+
if browser == "SF":
return safari_webkit_versions.get(version, "")
-
+
return b_versions.get(browser,(lambda x: x))(version)
def os_name(name, type=None):
@@ -58,7 +58,7 @@ def os_name(name, type=None):
"Android" : "AND",
"SymbianOS" : "SYM",
}
-
+
oss_typed = {"Windows" : {"Vista" : "WVI",
"Server 2003" : "WS3",
"XP" : "WXP",
@@ -69,12 +69,12 @@ def os_name(name, type=None):
"CE" : "WCE",
"ME" : "WME",
}}
-
+
if not oss.get(name):
return oss_typed.get(name,{}).get(type,unknown)
else:
return oss[name]
-
+
def referer_keyword(keyword):
return "" if keyword == "(not set)" else keyword
@@ -83,7 +83,7 @@ def referer_url(name):
return ""
elif name == "google":
return name
- else:
+ else:
return "http://%s/" % name
def referer_type(source, keyword):
@@ -92,7 +92,7 @@ def referer_type(source, keyword):
elif source == "google" or keyword != "":
return REFERER_TYPE_SEARCH_ENGINE
else:
- return REFERER_TYPE_WEBSITE
+ return REFERER_TYPE_WEBSITE
def referer_name(source, type):
if type == REFERER_TYPE_SEARCH_ENGINE:
@@ -116,13 +116,13 @@ def country_name(name):
return country_codes[name].lower()
except:
return "xx"
-
+
def visitor_localtime(google_value):
return "%s:00:00" % google_value
def visitor_returning(google_value):
return google_value == "Returning Visitor"
-
+
def flash_present(value):
return value != "(not set)"
67 sql.py
View
@@ -11,6 +11,7 @@
import warnings
import datetime
+
warnings.filterwarnings("ignore", category=MySQLdb.Warning)
T_LOGVA = "log_link_visit_action"
@@ -22,7 +23,7 @@
idaction_url_ref, idaction_name, time_spent_ref_action, idaction_name_ref)
VALUES (%s, binary(unhex(substring(%s,1,16))), %s, %s, %s, %s, %s, %s, 0) """
-INSERT_LOG_ACTION = "INSERT INTO {{LA}} (name, hash, type) VALUES (%s, %s, %s) "
+
LOGV_TEMPLATE = u""" INSERT INTO {{LV}} (idsite, visitor_localtime, idvisitor, visitor_returning, config_id,
visit_first_action_time, visit_last_action_time,
visit_exit_idaction_url, visit_entry_idaction_url, visit_total_actions,
@@ -51,8 +52,15 @@
1.8: (", location_continent", "", ", %(location_continent)s", "")
}
+INSERT_LOG_ACTION = {
+ 1.8: "INSERT INTO {{LA}} (name, hash, type) VALUES (%s, %s, %s) ",
+ 1.9: "INSERT INTO {{LA}} (name, hash, type, url_prefix) VALUES (%s, %s, %s, %s)"
+}
+
+
SELECT_NB_VISITS = "SELECT count(*) FROM {{LV}} WHERE visitor_localtime = %s and idsite = %s"
+
def initialize(mysql_data):
global T_LOGVA, T_LOGA, T_LOGV, T_SITE
global db, cursor
@@ -66,21 +74,27 @@ def initialize(mysql_data):
T_SITE = "%s_%s" % (prefix, T_SITE) if prefix else T_SITE
INSERT_LOG_VISIT_ACTION = INSERT_LOG_VISIT_ACTION.replace("{{LVA}}", T_LOGVA)
- INSERT_LOG_ACTION = INSERT_LOG_ACTION.replace("{{LA}}", T_LOGA)
SELECT_NB_VISITS = SELECT_NB_VISITS.replace("{{LV}}", T_LOGV)
LOGV_TEMPLATE = LOGV_TEMPLATE.replace("{{LV}}", T_LOGV)
for k, v in INSERT_LOG_VISIT.iteritems():
INSERT_LOG_VISIT[k] = LOGV_TEMPLATE.format(*INSERT_LOG_VISIT[k])
+ for k, v in INSERT_LOG_ACTION.iteritems():
+ INSERT_LOG_ACTION[k] = INSERT_LOG_ACTION[k].replace("{{LA}}", T_LOGA)
+
db = init_db(mysql_data)
db.set_character_set('utf8')
cursor = db.cursor()
-def insert_log_action(values):
- cursor.execute(INSERT_LOG_ACTION, values)
+
+def insert_log_action(values, version):
+ values_no = INSERT_LOG_ACTION[version].count('%s')
+ # from IPython import embed; embed();
+ cursor.execute(INSERT_LOG_ACTION[version], values[:values_no])
return cursor.lastrowid
+
def insert_log_visit(values, version):
try:
cursor.execute(INSERT_LOG_VISIT[version], values)
@@ -88,10 +102,12 @@ def insert_log_visit(values, version):
pass
return cursor.lastrowid
+
def insert_log_visit_action(values):
cursor.execute(INSERT_LOG_VISIT_ACTION, values)
return cursor.lastrowid
+
def init_db(mysql_data):
try:
db = MySQLdb.connect(mysql_data["host"], mysql_data["user"], mysql_data["passwd"],
@@ -102,25 +118,29 @@ def init_db(mysql_data):
print "Exception: ", e
exit()
+
def test_db(mysql_data):
global db, cursor
db = MySQLdb.connect(mysql_data["host"], mysql_data["user"], mysql_data["passwd"],
- mysql_data["db"], int(mysql_data["port"]))
+ mysql_data["db"], int(mysql_data["port"]))
db.set_character_set('utf8')
cursor = db.cursor()
+
def get_sites(prefix):
- select_site_sql = "SELECT idsite, name, main_url from {SITE_TABLE}".format(SITE_TABLE = prefix+"_"+T_SITE)
+ select_site_sql = "SELECT idsite, name, main_url from {SITE_TABLE}".format(SITE_TABLE=prefix + "_" + T_SITE)
cursor.execute(select_site_sql)
- return [{"id" : id, "name" : name, "url" : url} for (id, name, url) in cursor.fetchall()]
+ return [{"id": id, "name": name, "url": url} for (id, name, url) in cursor.fetchall()]
+
def get_version(prefix):
t_option = "%s_option" % (prefix) if prefix else "option"
- select_version_sql = "SELECT option_value FROM {table} WHERE option_name = 'version_core'".format(table = t_option)
+ select_version_sql = "SELECT option_value FROM {table} WHERE option_name = 'version_core'".format(table=t_option)
cursor.execute(select_version_sql)
- version = float(cursor.fetchone()[0][:3])
+ version = cursor.fetchone()[0]
return version
+
def check_tables(table_prefix):
global cursor
failed = []
@@ -132,26 +152,30 @@ def check_tables(table_prefix):
failed.append(table_name)
return failed
+
def check_site(site_id):
- select_site_sql = "SELECT count(*) from {SITE_TABLE} WHERE idsite = %s".format(SITE_TABLE = T_SITE)
+ select_site_sql = "SELECT count(*) from {SITE_TABLE} WHERE idsite = %s".format(SITE_TABLE=T_SITE)
cursor.execute(select_site_sql, site_id)
return cursor.fetchone()[0] == 1
+
def update_site_ts_created(site_id, date):
current_start = datetime.datetime(date.year, date.month, date.day)
- select_site_sql = "SELECT ts_created from {SITE_TABLE} WHERE idsite = %s".format(SITE_TABLE = T_SITE)
+ select_site_sql = "SELECT ts_created from {SITE_TABLE} WHERE idsite = %s".format(SITE_TABLE=T_SITE)
cursor.execute(select_site_sql, site_id)
ts_created = cursor.fetchone()[0]
if ts_created > current_start:
- update_site_sql = "UPDATE {SITE_TABLE} SET ts_created = %s WHERE idsite = %s".format(SITE_TABLE = T_SITE)
+ update_site_sql = "UPDATE {SITE_TABLE} SET ts_created = %s WHERE idsite = %s".format(SITE_TABLE=T_SITE)
cursor.execute(update_site_sql, (current_start, site_id))
+
def nb_visits_day(date, site_id):
- cursor.execute(SELECT_NB_VISITS,(date, site_id))
+ cursor.execute(SELECT_NB_VISITS, (date, site_id))
return cursor.fetchone()[0]
+
def update_visit_actions(start_date, end_date):
raw_sql = """UPDATE {LV} AS lv
LEFT JOIN (
@@ -165,9 +189,10 @@ def update_visit_actions(start_date, end_date):
SET lv.visit_total_actions = m.visit_actions
WHERE visit_last_action_time >= %s
AND visit_last_action_time <= %s
- """.format(LV = T_LOGV, LVA = T_LOGVA)
+ """.format(LV=T_LOGV, LVA=T_LOGVA)
cursor.execute(raw_sql, (start_date, end_date))
+
def update_total_visit_actions():
raw_sql = """UPDATE {LV} AS lv
LEFT JOIN (
@@ -179,9 +204,21 @@ def update_total_visit_actions():
) AS m ON
m.idvisit = lv.idvisit
SET lv.visit_total_actions = m.visit_actions
- """.format(LV = T_LOGV, LVA = T_LOGVA)
+ """.format(LV=T_LOGV, LVA=T_LOGVA)
cursor.execute(raw_sql)
+
+def finalize():
+ raw_sql = """UPDATE {LV}
+ SET visit_exit_idaction_name = visit_exit_idaction_url+1,
+ visit_entry_idaction_name = visit_entry_idaction_url+1;""".format(LV=T_LOGV)
+ cursor.execute(raw_sql)
+
+ raw_sql = """UPDATE {LVA}
+ SET idaction_name_ref = idaction_url_ref + 1;""".format(LVA=T_LOGVA)
+ cursor.execute(raw_sql)
+
+
def clear_archives():
query = cursor.execute('SHOW TABLES')
tables = cursor.fetchall()
Please sign in to comment.
Something went wrong with that request. Please try again.