Skip to content

Commit

Permalink
desktop: WebDataSource now tries not to bother the web server when as…
Browse files Browse the repository at this point in the history
…ked for samples.

It now scans for gaps in downloaded data files and if there are none they'll be considered 'complete' and will never be downloaded from the server again (at least until the cache database is cleared).

When checking query timespan the cache database will be consulted first and if the timespan fits within the cache database it won't bother asking the web server for its timespan.

It will also only load sysconfig once per session.

All of this means that a chart for last month can be plotted without a single web request being made. This all speeds up charts, export, view data and reports quite a bit in some cases.
  • Loading branch information
davidrg committed Jul 10, 2020
1 parent e32996b commit f755291
Show file tree
Hide file tree
Showing 15 changed files with 289 additions and 55 deletions.
7 changes: 1 addition & 6 deletions desktop/datasource/abstractlivedatasource.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
#include <QDate>

#include "sampleset.h"

enum hardware_type_t {
HW_GENERIC = 0,
HW_FINE_OFFSET = 1,
HW_DAVIS = 2
};
#include "hardwaretype.h"

struct _DavisLive {
float stormRain;
Expand Down
11 changes: 11 additions & 0 deletions desktop/datasource/hardwaretype.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef HARDWARETYPE_H
#define HARDWARETYPE_H

enum hardware_type_t {
HW_GENERIC = 0,
HW_FINE_OFFSET = 1,
HW_DAVIS = 2
};


#endif // HARDWARETYPE_H
3 changes: 3 additions & 0 deletions desktop/datasource/station_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <QString>
#include <QDateTime>

#include "hardwaretype.h"

typedef struct _station_info_t {
QString title;
QString description;
Expand All @@ -14,6 +16,7 @@ typedef struct _station_info_t {
bool isWireless;
bool hasSolarAndUV;
bool isValid;
hardware_type_t hardwareType;
} station_info_t;

typedef struct _sample_range_t {
Expand Down
48 changes: 39 additions & 9 deletions desktop/datasource/webcachedb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ void WebCacheDB::openDatabase() {
if (!runUpgradeScript(3, ":/cache_db/v3.sql", filename)) return; // v2 -> v3
if (!runUpgradeScript(4, ":/cache_db/v4.sql", filename)) return; // v3 -> v4
if (!runUpgradeScript(5, ":/cache_db/v5.sql", filename)) return; // v4 -> v5
if (!runUpgradeScript(6, ":/cache_db/v6.sql", filename)) return; // v5 -> v6

} else {
emit criticalError("Failed to determine version of cache database");
Expand Down Expand Up @@ -207,6 +208,7 @@ int WebCacheDB::getStationId(QString stationUrl) {
if (!query.lastError().isValid())
return getStationId(stationUrl);
else
qDebug() << "Failed to get stationId for URL:" << stationUrl;
return -1; // failure
}
}
Expand All @@ -225,12 +227,13 @@ int WebCacheDB::getDataFileId(QString dataFileUrl) {

int WebCacheDB::createDataFile(data_file_t dataFile, int stationId) {
QSqlQuery query(sampleCacheDb);
query.prepare("insert into data_file(station, url, last_modified, size) "
"values(:station, :url, :last_modified, :size)");
query.prepare("insert into data_file(station, url, last_modified, size, is_complete) "
"values(:station, :url, :last_modified, :size, :is_complete)");
query.bindValue(":station", stationId);
query.bindValue(":url", dataFile.filename);
query.bindValue(":last_modified", dataFile.last_modified.toTime_t());
query.bindValue(":size", dataFile.size);
query.bindValue(":is_complete", dataFile.isComplete);
query.exec();

if (!query.lastError().isValid()) {
Expand All @@ -243,14 +246,15 @@ int WebCacheDB::createDataFile(data_file_t dataFile, int stationId) {
}
}

void WebCacheDB::updateDataFile(int fileId, QDateTime lastModified, int size) {
void WebCacheDB::updateDataFile(int fileId, QDateTime lastModified, int size, bool isComplete) {
qDebug() << "Updating data file details...";
QSqlQuery query(sampleCacheDb);
query.prepare("update data_file set last_modified = :last_modified, "
"size = :size where id = :id");
"size = :size, is_complete = :is_complete where id = :id");
query.bindValue(":last_modified", lastModified.toTime_t());
query.bindValue(":size", size);
query.bindValue(":id", fileId);
query.bindValue(":is_complete", isComplete);
query.exec();

if (query.lastError().isValid())
Expand All @@ -272,7 +276,7 @@ data_file_t WebCacheDB::getDataFileCacheInformation(QString dataFileUrl) {
qDebug() << "Querying cache stats for URL" << dataFileUrl;

QSqlQuery query(sampleCacheDb);
query.prepare("select last_modified, size from data_file "
query.prepare("select last_modified, size, is_complete from data_file "
"where url = :url");
query.bindValue(":url", dataFileUrl);
query.exec();
Expand All @@ -284,6 +288,7 @@ data_file_t WebCacheDB::getDataFileCacheInformation(QString dataFileUrl) {
dataFile.last_modified = QDateTime::fromTime_t(
record.value(0).toInt());
dataFile.size = record.value(1).toInt();
dataFile.isComplete = record.value(2).toBool();

qDebug() << "Cache stats loaded from DB:"
<< dataFile.last_modified << dataFile.size;
Expand Down Expand Up @@ -373,7 +378,8 @@ void WebCacheDB::cacheDataFile(data_file_t dataFile, QString stationUrl) {
// data file exists. Update it.
updateDataFile(dataFileId,
dataFile.last_modified,
dataFile.size);
dataFile.size,
dataFile.isComplete);
}

if (dataFile.expireExisting) {
Expand Down Expand Up @@ -1500,6 +1506,10 @@ double nullableVariantDouble(QVariant v) {
return result;
}

int WebCacheDB::getSampleInterval(QString url) {
return getSampleInterval(getStationId(url));
}

int WebCacheDB::getSampleInterval(int stationId) {
QSqlQuery query(sampleCacheDb);
query.prepare("select sample_interval * 60 from station where station_id = :id");
Expand Down Expand Up @@ -2057,9 +2067,13 @@ station_info_t WebCacheDB::getStationInfo(QString url) {
return info;
}

// TODO: Populate hardware type!

QSqlQuery query(sampleCacheDb);
query.prepare("select title, description, latitude, longitude, altitude, solar_available, davis_broadcast_id "
"from station where code = :url");
query.prepare("select s.title, s.description, s.latitude, s.longitude, "
"s.altitude, s.solar_available, s.davis_broadcast_id, st.code as type_code "
"from station s inner join station_type st on st.station_type_id = s.station_type_id "
"where s.code = :url");
query.bindValue(":url", url);
if (query.exec()) {
if (query.first()) {
Expand All @@ -2079,6 +2093,19 @@ station_info_t WebCacheDB::getStationInfo(QString url) {

QVariant broadcastId = query.record().value("davis_broadcast_id");
info.isWireless = !broadcastId.isNull() && broadcastId.toInt() != -1;

QString typeCode = query.record().value("type_code").toString().toUpper();
if (typeCode == "DAVIS") {
info.hardwareType = HW_DAVIS;
} else if (typeCode == "FOWH1080") {
info.hardwareType = HW_FINE_OFFSET;
} else if (typeCode == "GENERIC") {
info.hardwareType = HW_GENERIC;
} else {
qWarning() << "Unrecognised hardware type code" << typeCode << ". Treating has GENERIC.";
info.hardwareType = HW_GENERIC;
}

}
}

Expand All @@ -2090,6 +2117,9 @@ sample_range_t WebCacheDB::getSampleRange(QString url) {
info.isValid = false;

int id = getStationId(url);
if (id < 0) {
return info;
}

QSqlQuery query(sampleCacheDb);
query.prepare("select max(time_stamp) as end, min(time_stamp) as start from sample where station_id = :id");
Expand All @@ -2099,7 +2129,7 @@ sample_range_t WebCacheDB::getSampleRange(QString url) {
info.start = QDateTime::fromTime_t(query.record().value("start").toInt());
info.end = QDateTime::fromTime_t(query.record().value("end").toInt());
info.isValid = info.start < info.end;
qDebug() << "Start" << info.start << "End" << info.end << "Valid" << info.isValid;
qDebug() << "Start" << info.start << "End" << info.end << "Valid" << info.isValid << "Station" << id;
return info;
}
}
Expand Down
11 changes: 10 additions & 1 deletion desktop/datasource/webcachedb.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ typedef struct _data_file_t {
bool isValid;
bool expireExisting;
bool hasSolarData;
bool isComplete;
} data_file_t;

typedef struct _image_set_t {
Expand Down Expand Up @@ -265,6 +266,13 @@ class WebCacheDB : public QObject
*/
station_info_t getStationInfo(QString url);

/** Gets the sample interval in minutes for the specified station
*
* @param url Station URL
* @return Sample interval in seconds
*/
int getSampleInterval(QString url);

/** Gets the range of samples available for the specified station
*
* @param url Station to get the range for
Expand Down Expand Up @@ -396,8 +404,9 @@ class WebCacheDB : public QObject
* @param fileId ID of the file record to update
* @param lastModified The new last modified date of the file.
* @param size The new size of the file.
* @param isComplete If the data file covers its full timespan completely (no gaps)
*/
void updateDataFile(int fileId, QDateTime lastModified, int size);
void updateDataFile(int fileId, QDateTime lastModified, int size, bool isComplete);

/** Drops all cache data associated with the specified file from the
* database.
Expand Down
6 changes: 5 additions & 1 deletion desktop/datasource/webdatasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <float.h>
#include <QDateTime>
#include <QNetworkProxyFactory>

#include <QCoreApplication>

#if (QT_VERSION < QT_VERSION_CHECK(5,2,0))
#include <limits>
Expand Down Expand Up @@ -837,6 +837,10 @@ void WebDataSource::makeProgress(QString message) {
//int maxValue = progressListener->maximum();
progressListener->setValue(value);

// Ensure the event loop gets some time in case all the queued tasks manage to
// do their jobs from the cache database and don't have to make any web requests.
QCoreApplication::processEvents();

TQLOG << "Making progress:" << message;
}

Expand Down
86 changes: 85 additions & 1 deletion desktop/datasource/webtasks/datafilewebtask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,27 @@

DataFileWebTask::DataFileWebTask(QString baseUrl, QString stationCode,
request_data_t requestData, QString name,
QString url, bool forceDownload, WebDataSource *ds)
QString url, bool forceDownload,
int sampleInterval, WebDataSource *ds)
: AbstractWebTask(baseUrl, stationCode, ds) {
_requestData = requestData;
_name = name;
_url = url;
_downloadingDataset = false; // We check the cache first.
_forceDownload = forceDownload;
_sampleInterval = sampleInterval;
}

void DataFileWebTask::beginTask() {
data_file_t cache_info =
WebCacheDB::getInstance().getDataFileCacheInformation(_url);

if (cache_info.isValid && cache_info.isComplete && !_forceDownload) {
qDebug() << "Data file is marked COMPLETE in cache database - no server check required" << _url;
emit finished();
return;
}

if (_forceDownload) {
getDataset();
} else {
Expand Down Expand Up @@ -167,6 +178,24 @@ data_file_t DataFileWebTask::loadDataFile(QStringList fileData,
QList<QDateTime> ignoreTimeStamps;
QList<QStringList> ignoreSampleParts;

/* How this should work:
* We're downloading data for an entire month. That means the data file
* should start within $archiveInterval minutes of 00:00 on the 1st.
* From there there should be a new sample every $archiveInterval minutes
* until we're within $archiveInterval minutes of the end of the month.
*/

QDateTime previousTime = QDateTime();
QDateTime endTime = QDateTime();

// We'll let the largest gap be slightly larger than the sample interval
// to account for things like clocks being adjusted, etc.
qDebug() << "Station sample interval is" << _sampleInterval;
int archiveInterval = _sampleInterval + 0.5*_sampleInterval;
qDebug() << "Using" << archiveInterval << "as gap threshold.";

bool gapDetected = false;

while (!fileData.isEmpty()) {
QString line = fileData.takeFirst();
QStringList parts = line.split(QRegExp("\\s+"));
Expand All @@ -178,6 +207,49 @@ data_file_t DataFileWebTask::loadDataFile(QStringList fileData,
tsString += " " + parts.takeFirst();
QDateTime timestamp = QDateTime::fromString(tsString, Qt::ISODate);

if (!gapDetected) {
// -----------/ The Gap Detection Zone /-----------
// Here in The Gap Detection Zone our job is to figure out if the data
// file contains absolutely every sample it could contain. This means
// checking the gap between any two samples is no greater than the
// stations sample interval.

if (!previousTime.isValid()) {
// First sample in the file. Initialise previousTime to be the very
// start of the month and set the end time to be the very end of
// the month
previousTime = QDateTime(
QDate(timestamp.date().year(),
timestamp.date().month(),
1),
QTime(0,0,0));
endTime = previousTime.addMonths(1).addSecs(-1);
qDebug() << "Data file max range:" << previousTime << "to" << endTime;
}

qint64 previousSecs = previousTime.toSecsSinceEpoch();
qint64 thisSecs = timestamp.toSecsSinceEpoch();
if (thisSecs - previousSecs > archiveInterval) {
qDebug() << "GAP: This timestamp is" << timestamp << "previous was" << previousTime << ". Gap duration is" << thisSecs - previousSecs << "seconds.";
gapDetected = true;
}

if (fileData.isEmpty()) {
// Reached the end of the file. Current row is the last row.
// Check the final timestamp in the file is within archiveInterval
// seconds of the end of the month.
qint64 endSecs = endTime.toSecsSinceEpoch();
if (endSecs - thisSecs > archiveInterval) {
qDebug() << "GAP (@end): The end is" << endTime << "last row was" << timestamp << ". Gap duration is" << endSecs - thisSecs << "seconds.";
gapDetected = true;
}
}

previousTime = timestamp;

// ------------------------------------------------
}

if (!cacheStats.isValid) {
// No ignore range. Let it through.
sampleParts.append(parts);
Expand All @@ -198,6 +270,17 @@ data_file_t DataFileWebTask::loadDataFile(QStringList fileData,

bool expireCache = false;

if (gapDetected) {
qDebug() << "----> Data file is INCOMPLETE: it contains one or more gaps!";
} else {
qDebug() << "----> Data file is COMPLETE: no gaps detected!";
// *this* data file is 100% complete. There should never be new rows
// to appear in it so the only reason we'd ever re-download it is if
// for some reason some values changed (data fixed some erroneous rain
// tips?). So we'll replace whatever is in the cache database with this.
expireCache = true;
}

if (ignoreTimeStamps.count() == cacheStats.count) {
// There is the same number of records between those timestamps in
// both the cache database and the data file. Probably safe to assume
Expand Down Expand Up @@ -309,6 +392,7 @@ data_file_t DataFileWebTask::loadDataFile(QStringList fileData,
dataFile.samples = samples;
dataFile.expireExisting = expireCache;
dataFile.hasSolarData = _requestData.isSolarAvailable;
dataFile.isComplete = !gapDetected;

return dataFile;
}
3 changes: 3 additions & 0 deletions desktop/datasource/webtasks/datafilewebtask.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ class DataFileWebTask : public AbstractWebTask
* @param baseUrl The base URL for the web interface
* @param stationCode Station Code for the weather station being used
* @param ds Parent data source that this task is doing work for
* @param sampleInterval Stations sample interval in seconds
*/
explicit DataFileWebTask(QString baseUrl,
QString stationCode,
request_data_t requestData,
QString name,
QString url,
bool forceDownload,
int sampleInterval,
WebDataSource* ds);

/** Starts processing this task.
Expand Down Expand Up @@ -72,6 +74,7 @@ public slots:

bool _downloadingDataset;
bool _forceDownload;
int _sampleInterval;

void cacheStatusRequestFinished(QNetworkReply *reply);
void downloadRequestFinished(QNetworkReply *reply);
Expand Down
Loading

0 comments on commit f755291

Please sign in to comment.