Skip to content

Commit

Permalink
Address mchandschuh's review #2
Browse files Browse the repository at this point in the history
  • Loading branch information
gsalaz98 committed Jul 10, 2019
1 parent 9401481 commit b11c14a
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 30 deletions.
4 changes: 2 additions & 2 deletions Tests/ToolBox/PsychSignalDataTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public class PsychSignalDataTests
[Test, Ignore("This test requires raw PsychSignal data")]
public void FileHourMatchesDataTimeRealRawData()
{
var rawPath = Path.Combine(Globals.DataFolder, "alternative", "psychsignal", "raw-psychsignal");
var rawPath = Path.Combine("raw", "alternative", "psychsignal");

foreach (var file in Directory.GetFiles(rawPath, "*.csv", SearchOption.TopDirectoryOnly).ToList())
{
var fileSplit = file.Split('_');
Expand Down
32 changes: 17 additions & 15 deletions ToolBox/PsychSignalDataConverter/PsychSignalDataConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ public PsychSignalDataConverter(string sourceDirectory, string destinationDirect

_destinationDirectory.Create();
}

/// <summary>
/// Converts a specific file to Lean alternative data format. Note that you must flush
/// after you're done converting a file to ensure that all data gets written to disk.
/// You can do that by calling <see cref="Dispose"/> once you've finished processing
///
///
/// Note: Assumes that it will be given files in ascending order by date
/// </summary>
/// <param name="sourceFilePath">File to process and convert</param>
Expand Down Expand Up @@ -89,7 +89,7 @@ public void Convert(FileInfo sourceFilePath)
handle = new TickerData(ticker, timestamp.Date, _destinationDirectory);
_fileHandles[ticker] = handle;
}

handle.Append(timestamp, csv);
}

Expand Down Expand Up @@ -154,7 +154,7 @@ public void ConvertDirectory()
}
)
.OrderBy(x => DateTime.ParseExact(x.Name.Substring(0, 11), "yyyyMMdd_HH", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal));

foreach (var rawFile in files)
{
Convert(rawFile);
Expand All @@ -164,7 +164,7 @@ public void ConvertDirectory()
}

/// <summary>
/// Utility method to compresses the data contained with the psychsignal alternative data folder
/// Utility method to compresses the data contained with the psychsignal alternative data folder
/// to a structure similar to equity minute files (e.g. /[symbol]/20010101.zip#20010101.csv)
/// </summary>
private void CompressData()
Expand All @@ -188,7 +188,7 @@ private void CompressData()
timer.Stop();
Log.Trace($"PsychSignalDataConverter.CompressData(): Finished compressing PsychSignal data in {timer.Elapsed.TotalSeconds} seconds");
}

/// <summary>
/// Handle to a file so that we don't have to open and close it every time we want
/// to write to a file. This helps us speed up time spent processing massively.
Expand All @@ -201,7 +201,7 @@ private class TickerData
private StreamWriter _writer;
private string _tempPath;
private DateTime _date;

/// <summary>
/// Windows filesystem forbids the following names as directory or file names
/// </summary>
Expand Down Expand Up @@ -252,7 +252,7 @@ public TickerData(string ticker, DateTime date, DirectoryInfo destinationDirecto
_ticker += "_";
}
}

/// <summary>
/// Adds a new line to the writer
/// </summary>
Expand All @@ -270,11 +270,12 @@ public void Append(DateTime timestamp, IEnumerable<string> csv)
_tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
_writer = new StreamWriter(_tempPath);
}

// Ignore the first three entries of the CSV line

// Ignore the first three columns of the CSV line because we already
// know the "source[0]", know the "symbol[1]", and have already parsed the "timestamp[2]"
_writer.WriteLine(ToCsv(timestamp, csv.Skip(3)));
}

/// <summary>
/// Moves the temporary file containing data to the final path, deleting
/// any existing file to avoid conflicts when moving
Expand All @@ -285,7 +286,7 @@ private void MoveTempFile()
var writePath = Path.Combine(tickerDirectory, $"{_date:yyyyMMdd}.csv");

Directory.CreateDirectory(tickerDirectory);

// We only want the latest version of the data
if (File.Exists(writePath))
{
Expand All @@ -305,9 +306,10 @@ private void MoveTempFile()
private string ToCsv(DateTime timestamp, IEnumerable<string> csvData)
{
// SOURCE[0],SYMBOL[1],TIMESTAMP_UTC[2],BULLISH_INTENSITY[3],BEARISH_INTENSITY[4],BULL_MINUS_BEAR[5],BULL_SCORED_MESSAGES[6],BEAR_SCORED_MESSAGES[7],BULL_BEAR_MSG_RATIO[8],TOTAL_SCANNED_MESSAGES[9]
// We should have skipped the first three entries, so our real starting index is "BULLISH_INTENSITY[3]"
return $"{timestamp.TimeOfDay.TotalMilliseconds},{string.Join(",", csvData)}";
}

/// <summary>
/// Flushes and closes the underlying <see cref="StreamWriter"/>
/// and moves the temp file to its final path
Expand All @@ -323,7 +325,7 @@ public void Flush()

#region IDisposable Support
private bool _disposedValue = false;

/// <summary>
/// Disposes the object. Any additional calls to any method will yield an <see cref="ObjectDisposedException" />
/// </summary>
Expand Down Expand Up @@ -361,7 +363,7 @@ public void Dispose(bool disposing)
_disposedValue = true;
}
}

/// <summary>
/// Default dispose
/// </summary>
Expand Down
33 changes: 20 additions & 13 deletions ToolBox/PsychSignalDataConverter/PsychSignalDataDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@
using System.Threading;
using NodaTime;
using QuantConnect.Logging;
using QuantConnect.Util;

namespace QuantConnect.ToolBox.PsychSignalDataConverter
{
public class PsychSignalDataDownloader
{
private readonly string _apiKey;
private readonly string _dataSource;


/// <summary>
/// Psychsignal imposes strict API rate limits set to about one request per ten seconds
/// </summary>
private readonly RateGate _apiRateGate = new RateGate(occurrences: 1, timeUnit: TimeSpan.FromSeconds(10));

/// <summary>
/// Base URL for the psychsignal API
/// </summary>
Expand All @@ -36,12 +42,12 @@ public class PsychSignalDataDownloader
/// Destination we will write raw data to
/// </summary>
private readonly string _rawDataDestination;

/// <summary>
/// Maximum amount of retries per data hour
/// Maximum amount of retries per data hour
/// </summary>
public int MaxRetries = 5;

/// <summary>
/// Downlods data from psychsignal
/// </summary>
Expand All @@ -55,7 +61,7 @@ public PsychSignalDataDownloader(string rawDataDestination, string apiKey, strin
_dataSource = dataSource;
_apiKey = apiKey;
}

/// <summary>
/// Download the data from the given starting date to the ending date.
/// Note that if the ending date is in the same hour as the current time,
Expand All @@ -68,38 +74,39 @@ public void Download(DateTime startDateUtc, DateTime endDateUtc)
{
var now = DateTime.UtcNow;
var nowHour = new DateTime(now.Year, now.Month, now.Day, now.Hour, 0, 0);

Directory.CreateDirectory(_rawDataDestination);

if (startDateUtc < now.AddDays(-15))
{
throw new ArgumentException("The starting date can only be at most 15 days from now");
}

// Makes sure we only get final, non-changing data by checking if the end date is greater than
// Makes sure we only get final, non-changing data by checking if the end date is greater than
// or equal to the current time and setting it to an hour before the current time if the condition is met
if (nowHour <= new DateTime(endDateUtc.Year, endDateUtc.Month, endDateUtc.Day, endDateUtc.Hour, 0, 0))
{
endDateUtc = nowHour.AddHours(-1);
}

// PsychSignal paginates data by hour

// PsychSignal paginates data by hour. Note that it is possible to retrieve non-complete data if the requested hour
// is the same as the current hour or greater than the current hour.
for (; startDateUtc < endDateUtc; startDateUtc = startDateUtc.AddHours(1))
{
var rawDataPath = Path.Combine(_rawDataDestination, $"{startDateUtc:yyyyMMdd_HH}_{_dataSource}.csv");
var rawDataPathTemp = Path.Combine(Path.GetTempPath(), $"{startDateUtc:yyyyMMdd_HH}_{_dataSource}.csv.tmp");

// Don't download files we already have
if (File.Exists(rawDataPath))
{
continue;
}

// Retry in case a download failed
for (var retries = 0; retries < MaxRetries; retries++)
{
// Psychsignal imposes very strict rate limits
Thread.Sleep(10000);
// Set a max timeout of ten seconds
_apiRateGate.WaitToProceed(10000);

try
{
Expand Down

0 comments on commit b11c14a

Please sign in to comment.