Skip to content
Browse files

Make script works with Google APIs

  • Loading branch information...
1 parent a0ba904 commit 2b4628759330dbdd782823a0323d85d54cde2db4 m.sobczak committed Aug 13, 2012
Showing with 69 additions and 20 deletions.
  1. +8 −5 README
  2. +4 −1 config.py
  3. +1 −0 google2piwik.conf.sample
  4. +35 −10 google2piwik.py
  5. +3 −0 google2piwikgui.py
  6. +4 −2 mappers/dimensions.py
  7. +14 −2 sql.py
View
13 README
@@ -1,4 +1,4 @@
-Google2Piwik Exporter, version 1.2.1, March 2011
+Google2Piwik Exporter, version 1.3, August 2012
Description
===========
@@ -12,6 +12,7 @@ Requirements
- The Google Analytics API currently does not support Google Apps for your Domain Accounts.
Thats why you can't export data from account@yourdomain.com even if you have access via web interface.
However you can grant privileges to your Gmail account, and use it to perform the export.
+- Goggle APIs API key (here you can get one: https://code.google.com/apis/console/)
- Python 2.6 with components:
- gdata-python-client (Google Python API) - http://code.google.com/p/gdata-python-client/
- MySQLdb
@@ -46,10 +47,11 @@ Known not trustworthy statistics:
* Visitors -> Settings -> Plugins
Google provides only informations about Flash Player and Java enabled.
-Because of Google API Policy, there is a limit of requests per 24 hours. Currently this script is using 5 requests (fetching 10 000 data rows)
-for one exported day, plus 3 for whole period. However number of requests depends on number of visits and total number of pageviews.
+Because of Google API Policy, there is a limit of requests per 24 hours. Exporter uses google API quota (50k requests/day).
+Currently this script is using 6 requests (fetching 10 000 data rows) for one exported day, plus 3 for whole period.
+One additional request (6th) has been added to populate visit numbers and days since last visit tables. It slowed a bit whole process of export.
+However number of requests depends on number of visits and total number of pageviews.
This means that you should be able to export about 2000 days per 24h in low and medium visited sites.
-(Read more @ http://code.google.com/intl/pl/apis/analytics/docs/gdata/gdataDeveloperGuide.html#quota)
Running GUI
===========
@@ -74,5 +76,6 @@ Development Team:
Daniel Borzęcki
Maciej Zawadziński
Piotr Rzepecki
+Maciej Sobczak
-Contact: office@clearcode.cc
+Contact: office@clearcode.cc
View
5 config.py
@@ -13,12 +13,13 @@
MYSQL_CREDENTIALS = {}
GOOGLE_USER = ""
GOOGLE_PASS = ""
+GOOGLE_KEY = ""
CONFIG_START = ""
CONFIG_END = ""
def read_config(config_file):
global ID_SITE, SITE_BASE_URL, GOOGLE_TABLE_ID
- global MYSQL_CREDENTIALS, GOOGLE_USER, GOOGLE_PASS
+ global MYSQL_CREDENTIALS, GOOGLE_USER, GOOGLE_PASS, GOOGLE_KEY
global CONFIG_START, CONFIG_END
conf = ConfigParser.RawConfigParser()
@@ -30,6 +31,7 @@ def read_config(config_file):
GOOGLE_TABLE_ID = conf.get("google", "table_id")
GOOGLE_USER = conf.get("google", "user_login")
GOOGLE_PASS = conf.get("google", "user_pass")
+ GOOGLE_KEY = conf.get("google", "api_key")
CONFIG_START= conf.get("export", "start")
CONFIG_END = conf.get("export", "end")
@@ -45,6 +47,7 @@ def write_config(config_file):
conf.set("google", "user_login", GOOGLE_USER)
conf.set("google", "user_pass", GOOGLE_PASS)
conf.set("google", "table_id", GOOGLE_TABLE_ID)
+ conf.set("google", "api_key", GOOGLE_KEY)
conf.add_section("mysql")
conf.set("mysql", "db", MYSQL_CREDENTIALS["db"])
View
1 google2piwik.conf.sample
@@ -5,6 +5,7 @@
user_login = user
user_pass = password
table_id = ga:XXXXXXXX
+api_key = XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# MySQL Piwik Database configuration
[mysql]
View
45 google2piwik.py
@@ -119,6 +119,9 @@ def export_day(day, fetcher):
VERBOSE("VISIT: Fetch 3", 2)
simulator.update(fetcher, "ga:longitude,ga:latitude,ga:hour,ga:browserVersion,ga:keyword,ga:source,ga:operatingSystemVersion","ga:visits")
+ VERBOSE("VISIT: Fetch 4", 2)
+ simulator.update(fetcher, "ga:longitude,ga:latitude,ga:hour,ga:visitCount,ga:daysSinceLastVisit","ga:visits")
+
"""
Getting landing and exit pages
"""
@@ -234,7 +237,7 @@ def finalize(self, additional):
"""
self.visit_log.update(additional)
- stable = ["ga:screenResolution", "ga:language", "ga:visitLength", "total_actions"]
+ stable = ["ga:screenResolution", "ga:language", "ga:visitLength", "total_actions", "ga:visitCount", "ga:daysSinceLastVisit"]
for stable_dim in stable:
self.visit_log[dims.DMAP[stable_dim]] = self.google_data.get(stable_dim) or 0
@@ -474,8 +477,8 @@ def FeedFetchSpecial(self, dimensions, metrics, day_start, day_end):
'end-date': day_end,
'dimensions': dimensions,
'metrics': metrics,
- 'max-results': '10000'})
-
+ 'max-results': '10000',
+ 'key': config.GOOGLE_KEY})
self.feed = self.client.GetDataFeed(data_query)
def FeedFetch(self, dimensions, metrics, day):
@@ -485,7 +488,8 @@ def FeedFetch(self, dimensions, metrics, day):
'end-date': day,
'dimensions': dimensions,
'metrics': metrics,
- 'max-results': '10000'})
+ 'max-results': '10000',
+ 'key': config.GOOGLE_KEY})
self.feed = self.client.GetDataFeed(data_query)
def getUniqueVisitors(self, day):
@@ -516,11 +520,15 @@ def FeedToDict(self, take_dimension = True, take_metric = True):
return result
def PrintTableIDs(self):
- account_query = gdata.analytics.client.AccountFeedQuery()
- table_feed = self.client.GetAccountFeed(account_query)
+ #account_query = gdata.analytics.client.AccountFeedQuery()
+ #table_feed = self.client.GetAccountFeed(account_query)
+ account_query = gdata.analytics.client.ProfileQuery('~all', '~all',
+ {'key': config.GOOGLE_KEY})
+ table_feed = self.client.GetManagementFeed(account_query)
print "Google Analytics Table IDs for your Account\n"
for entry in table_feed.entry:
- print "Site: %30s \t table_id: %s" % (entry.title.text, entry.table_id.text)
+ print "Site: %30s \t table_id: %s" % (entry.GetProperty('ga:profileName').value,
+ entry.GetProperty('dxp:tableId').value)
def GetTableIDs(self):
account_query = gdata.analytics.client.AccountFeedQuery()
@@ -558,6 +566,8 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
help="updates visit total actions field (for some cases needed after export)")
parser.add_option("-p", "--print-table-ids", dest="print_table_id", default=False, action="store_true",
help="prints table_id for every site on your Google Analytics Account")
+ parser.add_option("-C", "--clear-archives", dest="clear_archives", default=False, action="store_true",
+ help="Drops all archive tables in piwik database")
(options, args) = parser.parse_args(sys.argv)
__VERBOSE__ = options.verbose
@@ -568,6 +578,18 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
fetcher.PrintTableIDs()
exit()
+ if options.clear_archives:
+ try:
+ config.read_config(options.config_file)
+ sql.initialize(config.MYSQL_CREDENTIALS)
+ except:
+ "Please check your config file and run your script again"
+ exit()
+ print "Clearing archive tables"
+ sql.clear_archives()
+ print "Please go to your Piwik installation folder and run misc/cron/archive.sh script."
+ exit()
+
if options.check:
print "Checking configuration file:",
try:
@@ -579,9 +601,9 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
print
print "Checking Google Analytics"
- if not config.GOOGLE_USER.split("@")[1] == "gmail.com":
- print "Your e-mail address should be ending with @gmail.com"
- exit()
+ #if not config.GOOGLE_USER.split("@")[1] == "gmail.com":
+ #print "Your e-mail address should be ending with @gmail.com"
+ #exit()
print "Attempting login:",
try:
@@ -639,3 +661,6 @@ def EntryToDict(self, entry, take_dimension = True, take_metric = False):
sql.update_site_ts_created(config.ID_SITE, start_date)
export_period(start_date, end_date)
+
+ sql.clear_archives()
+ print "Please go to your Piwik installation folder and run misc/cron/archive.sh script."
View
3 google2piwikgui.py
@@ -56,6 +56,9 @@ def __init__(self, parent=None):
self.ga_password_field = QtGui.QLineEdit(config.GOOGLE_PASS)
self.ga_password_field.setEchoMode(QtGui.QLineEdit.Password)
form.addRow(QtGui.QLabel("Password"), self.ga_password_field)
+ self.ga_api_key_field = QtGui.QLineEdit(config.GOOGLE_KEY)
+ form.addRow(QtGui.QLabel("Api Key"), self.ga_api_key_field)
+
self.registerField("ga_user", self.ga_user_field)
self.registerField("ga_password", self.ga_password_field)
self.setLayout(form)
View
6 mappers/dimensions.py
@@ -25,10 +25,12 @@
"ga:landingPagePath" : "visit_entry_idaction_url",
"ga:source" : "referer_url",
"ga:keyword" : "referer_keyword",
-
+ "ga:visitCount" : "visitor_count_visits",
+ "ga:daysSinceLastVisit" : "visitor_days_since_last",
+
"referer_type" : "referer_type",
"referer_name" : "referer_name",
-
+
"visit_server_date" : "visit_server_date",
"total_actions" : "visit_total_actions",
"md5config" : "config_md5config",
View
16 sql.py
@@ -21,7 +21,7 @@
INSERT_LOG_VISIT_ACTION = """INSERT INTO {{LVA}} (idvisit, idvisitor, server_time, idsite, idaction_url,
idaction_url_ref, idaction_name, time_spent_ref_action, idaction_name_ref)
VALUES (%s, binary(unhex(substring(%s,1,16))), %s, %s, %s, %s, %s, %s, 0) """
-
+
INSERT_LOG_ACTION = "INSERT INTO {{LA}} (name, hash, type) VALUES (%s, %s, %s) "
INSERT_LOG_VISIT = u""" INSERT INTO {{LV}} (idsite, visitor_localtime, idvisitor, visitor_returning, config_id,
@@ -45,7 +45,7 @@
%(config_browser_version)s, %(config_resolution)s, 0, %(config_flash)s,
%(config_java)s, 0, 0, 0, 0, 0, 0, 0, 0,
%(location_browser_lang)s, %(location_country)s, %(location_continent)s,
- 0, 0, 0, 0, 0) """
+ %(visitor_count_visits)s, %(visitor_days_since_last)s, 0, 0, 0) """
SELECT_NB_VISITS = "SELECT count(*) FROM {{LV}} WHERE visitor_localtime = %s and idsite = %s"
@@ -166,3 +166,15 @@ def update_total_visit_actions():
SET lv.visit_total_actions = m.visit_actions
""".format(LV = T_LOGV, LVA = T_LOGVA)
cursor.execute(raw_sql)
+
+def clear_archives():
+ query = cursor.execute('SHOW TABLES')
+ tables = cursor.fetchall()
+ to_drop = []
+ for col in tables:
+ if 'archive' in col[0]:
+ to_drop.append(col[0])
+ if to_drop:
+ raw_sql = 'DROP TABLE ' + (', ').join(to_drop)
+ cursor.execute(raw_sql)
+

0 comments on commit 2b46287

Please sign in to comment.
Something went wrong with that request. Please try again.