Skip to content

Commit

Permalink
Fix timezones, rebuild historical gap and rate limit information [req…
Browse files Browse the repository at this point in the history
…uired]
  • Loading branch information
Emile den Tex committed Jun 14, 2016
1 parent 63080af commit 1ec0169
Show file tree
Hide file tree
Showing 32 changed files with 1,774 additions and 517 deletions.
421 changes: 0 additions & 421 deletions TESTPLAN.md

This file was deleted.

1 change: 1 addition & 0 deletions analysis/common/config.php
@@ -1,6 +1,7 @@
<?php

include_once(__DIR__ . '/../../config.php');
include_once(__DIR__ . '/../../common/constants.php');

/*
* Whether to cache results
Expand Down
27 changes: 25 additions & 2 deletions analysis/common/functions.php
Expand Up @@ -612,6 +612,12 @@ function validate($what, $how) {
if (!preg_match("/^\d{4}-\d{2}-\d{2}$/", $what)) // TODO, should never be more than 'now'
$what = "2011-11-15";
break;
case "interval":
// if an unsupported interval is specified, fallback to daily
if (!in_array($what, array('hourly', 'daily', 'weekly', 'monthly', 'yearly', 'overall', 'custom'))) {
$what = "daily";
}
break;
// escape shell cmd chars
case "shell":
$what = preg_replace("/[\/ ]/", "_", $what);
Expand Down Expand Up @@ -651,7 +657,7 @@ function decodeAndFlatten($text) {
// make sure that we have all the right types and values
// also make sure one cannot do a mysql injection attack
function validate_all_variables() {
global $esc, $query, $url_query, $geo_query, $dataset, $exclude, $from_user_name, $from_source, $startdate, $enddate, $databases, $connection, $keywords, $database, $minf, $topu, $from_user_lang, $outputformat;
global $esc, $query, $url_query, $geo_query, $dataset, $exclude, $from_user_name, $from_source, $startdate, $enddate, $interval, $databases, $connection, $keywords, $database, $minf, $topu, $from_user_lang, $outputformat;

$esc['mysql']['dataset'] = validate($dataset, "mysql");
$esc['mysql']['query'] = validate($query, "mysql");
Expand Down Expand Up @@ -679,6 +685,7 @@ function validate_all_variables() {

$esc['date']['startdate'] = validate($startdate, "startdate");
$esc['date']['enddate'] = validate($enddate, "enddate");
$esc['date']['interval'] = validate($interval, "interval");

if (preg_match("/^\d{4}-\d{2}-\d{2}$/", $esc['date']['startdate']))
$esc['datetime']['startdate'] = $esc['date']['startdate'] . " 00:00:00";
Expand Down Expand Up @@ -716,6 +723,22 @@ function current_collation() {
return $collation;
}

// This function accesses the tcat_status table (if it exists) and retrieves the value for a variable
function get_status($variable) {
global $esc, $hostname, $dbuser, $dbpass, $database;
$sql = "SELECT table_name FROM information_schema.tables WHERE table_schema = '$database' AND table_name = 'tcat_status'";
db_connect($hostname, $dbuser, $dbpass, $database);
$sqlresults = mysql_query($sql);
if (mysql_num_rows($sqlresults) > 0) {
$sql = "SELECT value FROM tcat_status WHERE variable = '" . mysql_real_escape_string($variable) . "'";
$sqlresults = mysql_query($sql);
if ($res = mysql_fetch_assoc($sqlresults)) {
return $res['value'];
}
}
return null;
}

// Output format: {dataset}-{startdate}-{enddate}-{query}-{exclude}-{from_user_name}-{from_user_lang}-{url_query}-{module_name}-{module_settings}-{hash}.{filetype}
function get_filename_for_export($module, $settings = "", $filetype = "csv") {
global $resultsdir, $esc;
Expand Down Expand Up @@ -784,7 +807,7 @@ function get_hash_tags($msg) {
function get_all_datasets() {
global $dataset;
$dbh = pdo_connect();
$rec = $dbh->prepare("SELECT id, querybin, type, active, comments FROM tcat_query_bins WHERE visible = TRUE ORDER BY LOWER(querybin)");
$rec = $dbh->prepare("SELECT id, querybin, type, active, comments FROM tcat_query_bins WHERE access = " . TCAT_QUERYBIN_ACCESS_OK . " OR access = " . TCAT_QUERYBIN_ACCESS_READONLY . " ORDER BY LOWER(querybin)");
$datasets = array();
if ($rec->execute() && $rec->rowCount() > 0) {
while ($res = $rec->fetch()) {
Expand Down
25 changes: 22 additions & 3 deletions analysis/index.php
Expand Up @@ -251,11 +251,11 @@ function getExportSettings() {
</tr>
<?php } ?>
<tr>
<td class="tbl_head">Startdate:</td><td><input type="text" id="ipt_startdate" size="60" name="startdate" value="<?php echo $startdate; ?>" /> (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)</td>
<td class="tbl_head">Startdate (UTC):</td><td><input type="text" id="ipt_startdate" size="60" name="startdate" value="<?php echo $startdate; ?>" /> (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)</td>
</tr>

<tr>
<td class="tbl_head">Enddate:</td><td><input type="text" id="ipt_enddate" size="60" name="enddate" value="<?php echo $enddate; ?>" /> (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)</td>
<td class="tbl_head">Enddate (UTC):</td><td><input type="text" id="ipt_enddate" size="60" name="enddate" value="<?php echo $enddate; ?>" /> (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)</td>
</tr>
<tr>
<td valign="middle" style='padding-top: 4px'><input type="submit" value="update overview" /></td>
Expand Down Expand Up @@ -326,6 +326,8 @@ function updatestatus() {
$show_url_export = true;
}
}
// see whether database is up-to-date to export ratelimit and gap tables
$show_ratelimit_and_gap_export = get_status('ratelimit_database_rebuild') == 2 ? true : false;
// see whether the lang table exists
$show_lang_export = FALSE;
$sql = "SHOW TABLES LIKE '" . $esc['mysql']['dataset'] . "_lang'";
Expand Down Expand Up @@ -721,9 +723,24 @@ function updatestatus() {
<div class="txt_desc">Use: get a grasp of the most popular media.</div>
<div class="txt_link"> &raquo; <a href="" onclick="var minf = askFrequency(); $('#whattodo').val('media_frequency&minf='+minf+getInterval());sendUrl('mod.media_frequency.php');return false;">launch</a></div>

<?php if ($show_ratelimit_and_gap_export) { ?>
<hr/>

</div>
<h3>Export an estimation of the number of rate limited tweets in your data</h3>
<div class="txt_desc">Exports a spreadsheet with an estimation of the ammount of non-captured tweets in your query due to ratelimit occurances.</div>
<div class="txt_desc">Use: gain insight in possible missing data due to hitting the Twitter API rate limits.</div>
<div class="txt_link"> &raquo; <a href="" onclick="$('#whattodo').val('ratelimits'+getInterval());sendUrl('mod.ratelimits.php');return false;">launch</a></div>

<hr/>

<h3>Export table with potential gaps in your data</h3>
<div class="txt_desc">Exports a spreadsheet with all known data gaps in your current query, during which TCAT was not running or capturing data for this bin.</div>
<div class="txt_desc">Use: Gain insight in possible missing data due to outages</div>
<div class="txt_link"> &raquo; <a href="" onclick="$('#whattodo').val('gaps');sendUrl('mod.gaps.php');return false;">launch</a></div>

<?php } ?>

</div>

<h2>Tweet exports</h2>

Expand Down Expand Up @@ -814,6 +831,8 @@ function updatestatus() {

<?php } ?>

<hr/>

</div>
<h2>Networks</h2>

Expand Down
83 changes: 83 additions & 0 deletions analysis/mod.gaps.php
@@ -0,0 +1,83 @@
<?php
require_once __DIR__ . '/common/config.php';
require_once __DIR__ . '/common/functions.php';
require_once __DIR__ . '/common/CSV.class.php';
?>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>TCAT :: Export gap data</title>

<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

<link rel="stylesheet" href="css/main.css" type="text/css" />

<script type="text/javascript" language="javascript">



</script>

</head>

<body>

<h1>TCAT :: Export gap data</h1>

<?php
validate_all_variables();

// make filename and open file for write
$module = "gapData";
$sql = "SELECT id, `type` FROM tcat_query_bins WHERE querybin = '" . mysql_real_escape_string($esc['mysql']['dataset']) . "'";
$sqlresults = mysql_query($sql);
if ($res = mysql_fetch_assoc($sqlresults)) {
$bin_id = $res['id'];
$bin_type = $res['type'];
} else {
die("Query bin not found!");
}
$exportSettings = array();
if (isset($_GET['exportSettings']) && $_GET['exportSettings'] != "")
$exportSettings = explode(",", $_GET['exportSettings']);
$filename = get_filename_for_export($module, implode("_", $exportSettings));
$csv = new CSV($filename, $outputformat);
// write header
$header = "start,end";
$csv->writeheader(explode(',', $header));

// make query
$sql = "SELECT * FROM tcat_error_gap WHERE type = '" . mysql_real_escape_string($bin_type) . "' and
start >= '" . mysql_real_escape_string($_GET['startdate']) . "' and end <= '" . mysql_real_escape_string($_GET['enddate']) . "'";
// loop over results and write to file
$sqlresults = mysql_query($sql);
if ($sqlresults) {
while ($data = mysql_fetch_assoc($sqlresults)) {
// the query bin must have been active during the gap period, if we want to report it as a possible gap
$sql2 = "SELECT count(*) as cnt FROM tcat_query_bins_phrases WHERE querybin_id = $bin_id and
starttime <= '" . $data["end"] . "' and (endtime >= '" . $data["start"] . "' or endtime is null or endtime = '0000-00-00 00:00:00')";
$sqlresults2 = mysql_query($sql2);
if ($sqlresults2) {
if ($data2 = mysql_fetch_assoc($sqlresults2)) {
if ($data2['cnt'] > 0) {
$csv->newrow();
$csv->addfield($data["start"]);
$csv->addfield($data["end"]);
$csv->writerow();
}
}
}
}
}
$csv->close();

echo '<fieldset class="if_parameters">';
echo '<legend>Your File</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

</body>
</html>

0 comments on commit 1ec0169

Please sign in to comment.