Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/Backend/Controllers/AdminController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ public ActionResult<string> StopScrappersInBackground()
[Route("scrappers/background/status")]
public ActionResult<string> GetScrappersInBackgroundStatus()
{
this.scrapperRunner.StopBackgroundRunner();
return Ok($"{this.scrapperRunner.GetStatus()}");
return Ok(this.scrapperRunner.GetStatus());
}
}
}
53 changes: 33 additions & 20 deletions src/Backend/Operations/ScrapperRunner.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Collections.Concurrent;
using System.Dynamic;
using System.Text;
using Common.Engines;
using Common.Managers;
Expand All @@ -17,10 +18,12 @@ public class ScrapperRunner
private ConcurrentBag<string> enabledScrappers = new ConcurrentBag<string>();
private TimeSpan runInterval = TimeSpan.FromHours(3);
private CancellationTokenSource cts = new CancellationTokenSource();
private Task backgroundTask = null;
public string CurrentState { get; private set; } = "Stopped";
private Task? backgroundTask = null;
public string CurrentState => backgroundTask != null && !backgroundTask.IsCompleted ? "Running" : "Stopped";

private string LastError = string.Empty;
private DateTime lastRunTime = DateTime.MinValue;
private ConcurrentDictionary<string, string> lastRunResults = new ConcurrentDictionary<string, string>();

public ScrapperRunner(ILogger<ScrapperRunner> logger, JobScrapperSettingsManager settingsManager, GSEngine gSEngine, AIEngine aIEngine, JobsRepository jobsRepository)
{
Expand Down Expand Up @@ -51,16 +54,18 @@ public async Task RunScrapperAsync(string scrapperId)
if (settings == null)
{
logger.LogWarning($"Scrapper settings not found for id: {scrapperId}. Skipping scrapper run.");
this.lastRunResults[scrapperId] = $"[{DateTime.Now}] Settings not found.";
return;
}

try
{
var scrapper = new JobScrapper(gsEngine, aiEngine, jobsRepository, logger);
scrapper.ConfigureSettings(settings);
await scrapper.RunAsync();
logger.LogInformation($"Scrapper run completed for id: {scrapperId}");
var res = await scrapper.RunAsync();
logger.LogInformation($"Scrapper run completed for id: {scrapperId} | Results: {res}");
settings.lastRunTime = DateTime.UtcNow;
this.lastRunResults[scrapperId] = $"[{DateTime.Now}] {res}";
await this.settingsManager.UpdateSettingsAsync(scrapperId, settings);
}
catch (Exception ex)
Expand All @@ -76,7 +81,6 @@ public void StartBackgroundRunner()
{
cts = new CancellationTokenSource();
backgroundTask = RunInBackgroundAsync(cts.Token);
this.CurrentState = "Running";
}
}

Expand All @@ -85,32 +89,41 @@ public void StopBackgroundRunner()
if (cts != null && !cts.IsCancellationRequested)
{
cts.Cancel();
this.CurrentState = "Stopped";
}
}

public string GetStatus()
{
var sb = new StringBuilder();
sb.Append($"CurrentState: {this.CurrentState}\n");
sb.Append($"AI Engine Ready: {this.aiEngine.IsReady()}\n");
sb.Append($"Run Interval: {this.runInterval} | Last Run Time (UTC): {this.lastRunTime}\n");
sb.Append($"EnabledScrappers: {string.Join(",", this.enabledScrappers)}\n");
sb.Append($"LastError: {this.LastError}");
return sb.ToString();
public dynamic GetStatus()
{
dynamic status = new ExpandoObject();
var dict = (IDictionary<string, object>)status;

dict["CurrentState"] = this.CurrentState;
dict["AIEngineReady"] = this.aiEngine?.IsReady() ?? false;
dict["RunInterval"] = this.runInterval;
dict["LastRunTimeUtc"] = this.lastRunTime;
dict["EnabledScrappers"] = this.enabledScrappers.ToArray();
// copy concurrent dictionary to a normal dictionary for safe enumeration / serialization
dict["LastResults"] = this.lastRunResults.ToDictionary(kv => kv.Key, kv => kv.Value);
dict["LastError"] = this.LastError;

return status;
}

private async Task RunInBackgroundAsync(CancellationToken cancellationToken)
{
TimeSpan checkInterval = TimeSpan.FromMinutes(5);
while (!cancellationToken.IsCancellationRequested)
{
lastRunTime = DateTime.UtcNow;
foreach (var scrapperId in enabledScrappers)
if (DateTime.UtcNow - lastRunTime > runInterval)
{
logger.LogInformation($"Starting scrapper run for id: {scrapperId}");
await RunScrapperAsync(scrapperId);
foreach (var scrapperId in enabledScrappers)
{
logger.LogInformation($"Starting scrapper run for id: {scrapperId}");
await RunScrapperAsync(scrapperId);
}
lastRunTime = DateTime.UtcNow;
}
await Task.Delay(runInterval, cancellationToken);
await Task.Delay(checkInterval, cancellationToken);
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions src/Common/Managers/JobScrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public void ConfigureSettings(JobScrapperSettings settings)
this.settings = settings;
}

public async Task RunAsync()
public async Task<string> RunAsync()
{
var startTime = DateTime.UtcNow;
this.logger.LogInformation($"Starting JobScrapper run for settings: {this.settings}");
Expand All @@ -37,7 +37,7 @@ public async Task RunAsync()
if (searchResults == null || searchResults.Count == 0)
{
this.logger.LogInformation($"Nothing to process. Query settings: {this.settings}");
return;
return "No search results, processing skipped.";
}

var mp = new Dictionary<string, ScrappedJob>(StringComparer.OrdinalIgnoreCase);
Expand All @@ -49,6 +49,8 @@ public async Task RunAsync()
}
}

// TODO: Filter duplicates by fetching the latest jobs from DB in last 1d.

var levels = await this.aiEngine.GetJobLevelAsync(searchResults);
foreach (var level in levels)
{
Expand All @@ -62,17 +64,19 @@ public async Task RunAsync()
}
}

var newResults = 0;
foreach (var job in searchResults)
{
var success = await this.jobsRepository.CreateIfNotExistsAsync(job);
if (!success)
if (success)
{
this.logger.LogError($"Failed to push job {job.id} to JobsRepository.");
newResults++;
}
}

var duration = DateTime.UtcNow - startTime;
this.logger.LogInformation($"JobScrapper run completed. Duration: {duration}. Processed {searchResults.Count} jobs for settings: {this.settings}");
return $"Processed {searchResults.Count} jobs | New addition: {newResults} in {duration.TotalSeconds} seconds.";
}

}
Expand Down