diff --git a/src/.dockerignore b/src/.dockerignore new file mode 100644 index 0000000..fe1152b --- /dev/null +++ b/src/.dockerignore @@ -0,0 +1,30 @@ +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/azds.yaml +**/bin +**/charts +**/docker-compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md +!**/.gitignore +!.git/HEAD +!.git/config +!.git/packed-refs +!.git/refs/heads/** \ No newline at end of file diff --git a/src/Backend/Backend.csproj b/src/Backend/Backend.csproj index 1a02122..140b068 100644 --- a/src/Backend/Backend.csproj +++ b/src/Backend/Backend.csproj @@ -7,6 +7,11 @@ + + + + + diff --git a/src/Backend/Controllers/AdminController.cs b/src/Backend/Controllers/AdminController.cs new file mode 100644 index 0000000..dad6987 --- /dev/null +++ b/src/Backend/Controllers/AdminController.cs @@ -0,0 +1,69 @@ + +namespace Backend.Controllers +{ + using Backend.Operations; + using Common.Managers; + using Microsoft.AspNetCore.Mvc; + + [ApiController] + [Route("api/admin")] + public class AdminController : ControllerBase + { + private readonly ILogger logger; + private readonly ScrapperRunner scrapperRunner; + + public AdminController(ILogger logger, ScrapperRunner scrapperRunner) + { + this.logger = logger; + this.scrapperRunner = scrapperRunner; + } + + [HttpGet] + [Route("scrappers/trigger/{scrapperId}")] + public ActionResult TriggerScrapperRun(string scrapperId) + { + // _ = Task.Run(async () => await scrapperRunner.RunScrapperAsync(scrapperId)); + return Ok($"[Dummy]: Scrapper run triggered for id: {scrapperId}"); + } + + [HttpPut] + [Route("scrappers/trigger/{scrapperId}")] + public ActionResult EnableScrapper(string scrapperId) + { + this.scrapperRunner.EnableScrapper(scrapperId); + return Ok($"Scrapper enabled for id: {scrapperId}"); + } + + [HttpDelete] + [Route("scrappers/trigger/{scrapperId}")] + public ActionResult DisableScrapper(string scrapperId) + { + this.scrapperRunner.DisableScrapper(scrapperId); + return Ok($"Scrapper disabled for id: {scrapperId}"); + } + + [HttpGet] + [Route("scrappers/background/start")] + public ActionResult StartScrappersInBackground() + { + this.scrapperRunner.StartBackgroundRunner(); + return Ok($"Background scrapper runs started. Current State: {this.scrapperRunner.CurrentState}"); + } + + [HttpGet] + [Route("scrappers/background/stop")] + public ActionResult StopScrappersInBackground() + { + this.scrapperRunner.StopBackgroundRunner(); + return Ok($"Background scrapper runs stopped. Current State: {this.scrapperRunner.CurrentState}"); + } + + [HttpGet] + [Route("scrappers/background/status")] + public ActionResult GetScrappersInBackgroundStatus() + { + this.scrapperRunner.StopBackgroundRunner(); + return Ok($"{this.scrapperRunner.GetStatus()}"); + } + } +} \ No newline at end of file diff --git a/src/Backend/Controllers/JobSearchController.cs b/src/Backend/Controllers/JobSearchController.cs new file mode 100644 index 0000000..7a396e4 --- /dev/null +++ b/src/Backend/Controllers/JobSearchController.cs @@ -0,0 +1,60 @@ +namespace Backend.Controllers +{ + using Backend.Operations; + using Microsoft.AspNetCore.Mvc; + using Common.Models.Public; + using Common.Repositories; + using System.Threading.Tasks; + using Common.Managers; + using Common.Engines; + using Common.Queries; + using Common.DatabaseModels; + + [ApiController] + [Route("api/jobs")] + public class JobSearchController : ControllerBase + { + private readonly JobsRepository jobsRepository; + private readonly ILogger logger; + public JobSearchController(JobsRepository jobsRepository, ILogger logger) + { + this.logger = logger; + this.jobsRepository = jobsRepository; + } + + [HttpPost] + [Route("search")] + public async Task>> SearchJobs([FromBody] JobQuery jobquery) + { + return Ok(await jobsRepository.GetJobsFromQuery(jobquery)); + } + + [HttpGet] + [Route("latest")] + public async Task> GetLatestJobsFromDb( + [FromQuery] string location = "India", + [FromQuery] string level = "Mid") + { + return Content(JobListView.RenderScrappedJobsHtml(await this.jobsRepository.GetJobsEasyQueryAsync(location, level)), "text/html"); + } + + [HttpGet] + [Route("lastOneDay")] + public async Task> GetLastOneDayJobsFromDb() + { + return Ok(await this.jobsRepository.GetAllJobsInLastOneDay()); + } + + [HttpGet] + [Route("profile/{id}")] + public async Task> GetJobById(string id) + { + var job = await this.jobsRepository.GetJobByIdAsync(id); + if (job != null) + { + return Ok(job); + } + return Ok("Not found."); + } + } +} \ No newline at end of file diff --git a/src/Backend/Controllers/ProblemsController.cs b/src/Backend/Controllers/ProblemsController.cs index 1bb76ad..beacd1b 100644 --- a/src/Backend/Controllers/ProblemsController.cs +++ b/src/Backend/Controllers/ProblemsController.cs @@ -1,7 +1,7 @@ namespace Backend.Controllers { using Backend.Filters; - using Backend.Models.Public; + using Common.Models.Public; using Backend.Operations; using Common.Models; using Microsoft.AspNetCore.Mvc; diff --git a/src/Backend/Controllers/ScrapperSettingsController.cs b/src/Backend/Controllers/ScrapperSettingsController.cs new file mode 100644 index 0000000..4192716 --- /dev/null +++ b/src/Backend/Controllers/ScrapperSettingsController.cs @@ -0,0 +1,64 @@ +using Common.DatabaseModels; +using Common.Engines; +using Common.Managers; +using Common.Models; +using Common.Models.Public; +using Common.Repositories; +using Microsoft.AspNetCore.Mvc; + +namespace Backend.Controllers +{ + [ApiController] + [Route("api/jobs/scrappers")] + public class ScrapperSettingsController : ControllerBase + { + private readonly JobScrapperSettingsManager _settingsManager; + + private readonly ILogger _logger; + + public ScrapperSettingsController(JobScrapperSettingsManager jobScrapperSettingsManager, + ILogger logger) + { + _settingsManager = jobScrapperSettingsManager; + _logger = logger; + } + + [HttpGet] + [Route("")] + public async Task>> GetAllJobScrappers() + { + // Placeholder implementation for getting all scrappers + return Ok(await _settingsManager.GetAllSettings()); + } + + [HttpPut] + [Route("{id}")] + public async Task> UpdateJobScrapperSettings(string id, [FromBody] ScrapperSettings settings) + { + try + { + return Ok(await _settingsManager.CreateOrUpdateSettings(id, settings)); + } + catch (InvalidOperationException ex) + { + return BadRequest(ex.Message); + } + } + + [HttpPost] + [Route("add")] + public async Task> CreateNewJobScrapperSettings([FromBody] ScrapperSettings settings) + { + return BadRequest("Use PUT api/jobs/scrappers/{id} to create or update scrapper settings."); + // return Ok(await _settingsManager.CreateOrUpdateSettings(string.Empty, settings)); + } + + [HttpGet] + [Route("{id}")] + public async Task> GetJobScrapperSettings(string id) + { + // Placeholder implementation for getting scrapper settings + return Ok(await _settingsManager.GetSettingsById(id)); + } + } +} diff --git a/src/Backend/Filters/IFilter.cs b/src/Backend/Filters/IFilter.cs deleted file mode 100644 index 91a29e5..0000000 --- a/src/Backend/Filters/IFilter.cs +++ /dev/null @@ -1,9 +0,0 @@ -using Common.Models; - -namespace Backend.Filters -{ - public interface IFilter - { - public List ApplyFilterAsync(List problems); - } -} \ No newline at end of file diff --git a/src/Backend/Filters/ProblemFilter.cs b/src/Backend/Filters/ProblemFilter.cs index c42b8b6..1af746d 100644 --- a/src/Backend/Filters/ProblemFilter.cs +++ b/src/Backend/Filters/ProblemFilter.cs @@ -1,8 +1,9 @@ +using Common; using Common.Models; namespace Backend.Filters { - public class ProblemFilter : IFilter + public class ProblemFilter : IFilter { private int skip = 0; private int limit = 50; diff --git a/src/Backend/Operations/DataProvider.cs b/src/Backend/Operations/DataProvider.cs index c280dd1..04f80a0 100644 --- a/src/Backend/Operations/DataProvider.cs +++ b/src/Backend/Operations/DataProvider.cs @@ -1,6 +1,7 @@ namespace Backend.Operations { using Backend.Filters; + using Common; using Common.Cache; using Common.Constants; using Common.Models; @@ -15,7 +16,7 @@ public DataProvider([FromKeyedServices(CacheConstants.ProblemCacheKey)] ICache p _logger = logger; } - public async Task> GetProblemsAsync(IFilter? filter = null) + public async Task> GetProblemsAsync(IFilter? filter = null) { var allProblems = await GetAllProblemsAsync(); if (filter != null) diff --git a/src/Backend/Operations/ScrapperRunner.cs b/src/Backend/Operations/ScrapperRunner.cs new file mode 100644 index 0000000..79d8f2c --- /dev/null +++ b/src/Backend/Operations/ScrapperRunner.cs @@ -0,0 +1,117 @@ +using System.Collections.Concurrent; +using System.Text; +using Common.Engines; +using Common.Managers; +using Common.Repositories; + +namespace Backend.Operations +{ + public class ScrapperRunner + { + ILogger logger; + GSEngine gsEngine; + AIEngine aiEngine; + JobsRepository jobsRepository; + JobScrapperSettingsManager settingsManager; + + private ConcurrentBag enabledScrappers = new ConcurrentBag(); + private TimeSpan runInterval = TimeSpan.FromHours(3); + private CancellationTokenSource cts = new CancellationTokenSource(); + private Task backgroundTask = null; + public string CurrentState { get; private set; } = "Stopped"; + private string LastError = string.Empty; + private DateTime lastRunTime = DateTime.MinValue; + + public ScrapperRunner(ILogger logger, JobScrapperSettingsManager settingsManager, GSEngine gSEngine, AIEngine aIEngine, JobsRepository jobsRepository) + { + this.logger = logger; + this.gsEngine = gSEngine; + this.aiEngine = aIEngine; + this.jobsRepository = jobsRepository; + this.settingsManager = settingsManager; + } + + public void EnableScrapper(string scrapperId) + { + if (!enabledScrappers.Contains(scrapperId)) + { + enabledScrappers.Add(scrapperId); + } + } + + public void DisableScrapper(string scrapperId) + { + enabledScrappers = new ConcurrentBag(enabledScrappers.Except(new List { scrapperId })); + } + + + public async Task RunScrapperAsync(string scrapperId) + { + var settings = await this.settingsManager.GetSettingsById(scrapperId); + if (settings == null) + { + logger.LogWarning($"Scrapper settings not found for id: {scrapperId}. Skipping scrapper run."); + return; + } + + try + { + var scrapper = new JobScrapper(gsEngine, aiEngine, jobsRepository, logger); + scrapper.ConfigureSettings(settings); + await scrapper.RunAsync(); + logger.LogInformation($"Scrapper run completed for id: {scrapperId}"); + settings.lastRunTime = DateTime.UtcNow; + await this.settingsManager.UpdateSettingsAsync(scrapperId, settings); + } + catch (Exception ex) + { + logger.LogError($"Error running scrapper for id: {scrapperId}. Exception: {ex}"); + this.LastError = ex.Message; + } + } + + public void StartBackgroundRunner() + { + if (backgroundTask == null || backgroundTask.IsCompleted) + { + cts = new CancellationTokenSource(); + backgroundTask = RunInBackgroundAsync(cts.Token); + this.CurrentState = "Running"; + } + } + + public void StopBackgroundRunner() + { + if (cts != null && !cts.IsCancellationRequested) + { + cts.Cancel(); + this.CurrentState = "Stopped"; + } + } + + public string GetStatus() + { + var sb = new StringBuilder(); + sb.Append($"CurrentState: {this.CurrentState}\n"); + sb.Append($"AI Engine Ready: {this.aiEngine.IsReady()}\n"); + sb.Append($"Run Interval: {this.runInterval} | Last Run Time (UTC): {this.lastRunTime}\n"); + sb.Append($"EnabledScrappers: {string.Join(",", this.enabledScrappers)}\n"); + sb.Append($"LastError: {this.LastError}"); + return sb.ToString(); + } + + private async Task RunInBackgroundAsync(CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + lastRunTime = DateTime.UtcNow; + foreach (var scrapperId in enabledScrappers) + { + logger.LogInformation($"Starting scrapper run for id: {scrapperId}"); + await RunScrapperAsync(scrapperId); + } + await Task.Delay(runInterval, cancellationToken); + } + } + } +} \ No newline at end of file diff --git a/src/Backend/Program.cs b/src/Backend/Program.cs index 3c19749..12127ae 100644 --- a/src/Backend/Program.cs +++ b/src/Backend/Program.cs @@ -3,7 +3,9 @@ namespace Backend; using Backend.Operations; using Common.Cache; using Common.Constants; +using Common.Engines; using Common.Factories; +using Common.Managers; using Common.Repositories; using Microsoft.Azure.Cosmos; using Microsoft.Extensions.Logging.ApplicationInsights; @@ -59,12 +61,14 @@ public static void Main(string[] args) { // Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle builder.Services.AddEndpointsApiExplorer(); - builder.Services.AddSwaggerGen(); + builder.Services.AddSwaggerGen(c => + { + c.CustomSchemaIds(type => type.FullName); + }); builder.Logging.AddConsole(); } - // Register AppContext as singleton var config = builder.Configuration; #region Register Cosmos related services @@ -94,6 +98,12 @@ public static void Main(string[] args) services.AddTransient(); #endregion + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + var app = builder.Build(); ILogger logger = app.Logger; diff --git a/src/Backend/Properties/launchSettings.json b/src/Backend/Properties/launchSettings.json index fe3de06..8b6a653 100644 --- a/src/Backend/Properties/launchSettings.json +++ b/src/Backend/Properties/launchSettings.json @@ -29,6 +29,16 @@ "ASPNETCORE_ENVIRONMENT": "Development" } }, + "container": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "http://0.0.0.0:5164", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, "IIS Express": { "commandName": "IISExpress", "launchBrowser": true, diff --git a/src/Backend/Views/JobListView.cs b/src/Backend/Views/JobListView.cs new file mode 100644 index 0000000..d1fdff1 --- /dev/null +++ b/src/Backend/Views/JobListView.cs @@ -0,0 +1,54 @@ +using System.Text; +using Common.DatabaseModels; + +public static class JobListView +{ + public static string RenderScrappedJobsHtml(List jobs) + { + var sb = new StringBuilder(); + + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine("Scrapped Jobs"); + sb.AppendLine(@""); + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine("

Scrapped Job Listings

"); + + foreach (var job in jobs) + { + sb.AppendLine("
"); + sb.AppendLine($" "); + sb.AppendLine($"
{System.Net.WebUtility.HtmlEncode(job.companyName ?? "Unknown")} — {System.Net.WebUtility.HtmlEncode(job.location ?? "N/A")}
"); + sb.AppendLine($"
{System.Net.WebUtility.HtmlEncode(job.snippet ?? "No description available.")}
"); + + if (job.tags != null && job.tags.Count > 0) + { + sb.AppendLine("
"); + foreach (var tag in job.tags) + sb.AppendLine($"{System.Net.WebUtility.HtmlEncode(tag)}"); + sb.AppendLine("
"); + } + + sb.AppendLine($" "); + sb.AppendLine($"
Scrapped: {job.scrappedTime:yyyy-MM-dd HH:mm}
"); + sb.AppendLine("
"); + } + + sb.AppendLine(""); + + return sb.ToString(); + } +} diff --git a/src/Backend/appsettings.json b/src/Backend/appsettings.json index 22c42e9..d7bf0c3 100644 --- a/src/Backend/appsettings.json +++ b/src/Backend/appsettings.json @@ -11,7 +11,10 @@ "CosmosDbUri": "https://lcw-cosmos.documents.azure.com:443/", "AccountKey": "", "LCProject:DatabaseName": "LeetCodeWrapper", - "LCProject:ContainerName": "Problems" + "LCProject:ContainerName": "Problems", + "JobProject:DatabaseName": "JobDataBase", + "JobProject:ContainerName": "JobDetailsContainer", + "JobProject:ScraperContainerName": "ScrapperSettingsContainer" }, "ApplicationInsights": { "LogLevel": { @@ -20,5 +23,9 @@ }, "ConnectionString": "" }, + "GoogleSearch": { + "ApiKey": "", + "SearchEngineId": "e509d21f7c4af4d2c" + }, "AllowedHosts": "*" } diff --git a/src/Common/Common.csproj b/src/Common/Common.csproj index 844bdc5..91dccc4 100644 --- a/src/Common/Common.csproj +++ b/src/Common/Common.csproj @@ -13,6 +13,9 @@ + + +
diff --git a/src/Common/Constants/ConfigurationConstants.cs b/src/Common/Constants/ConfigurationConstants.cs index bb21455..b286deb 100644 --- a/src/Common/Constants/ConfigurationConstants.cs +++ b/src/Common/Constants/ConfigurationConstants.cs @@ -14,6 +14,9 @@ public static class ConfigurationConstants public const string ApplicationSettings = "ApplicationSettings"; public const string LCProjectContainerNameKey = "LCProject:ContainerName"; public const string LCProjectDatabaseNameKey = "LCProject:DatabaseName"; + public const string JobsProjectContainerNameKey = "JobProject:ContainerName"; + public const string JobsScraperSettingsContainerNameKey = "JobProject:ScraperContainerName"; + public const string JobsProjectDatabaseNameKey = "JobProject:DatabaseName"; #endregion } } diff --git a/src/Common/DatabaseModels/JobScrapperSettings.cs b/src/Common/DatabaseModels/JobScrapperSettings.cs new file mode 100644 index 0000000..570c209 --- /dev/null +++ b/src/Common/DatabaseModels/JobScrapperSettings.cs @@ -0,0 +1,82 @@ +using System; +using Common.Models.Public; +using PublicSettingsModel = Common.Models.Public.QuerySettings; + +namespace Common.DatabaseModels +{ + public class JobScrapperSettings + { + public string id { get; set; } + + public string settingName { get; set; } + + public bool enabled { get; set; } + + public DateTime lastUpdated { get; set; } + + public DateTime lastRunTime { get; set; } + + public int runIntervalInMinutes { get; set; } + + public QuerySettings settings { get; set; } + + public JobScrapperSettings(string id, + string settingName, + int? runIntervalsInMinutes, + PublicSettingsModel settings, + bool enabled = false) + { + this.id = id; + this.settingName = settingName; + this.enabled = enabled; + this.lastUpdated = DateTime.UtcNow; + this.lastRunTime = DateTime.MinValue; + this.runIntervalInMinutes = Math.Min(60, runIntervalsInMinutes ?? 60); + this.settings = new QuerySettings(settings); + } + + public void UpdateFromPublicModel(ScrapperSettings publicSettings) + { + if (publicSettings == null) throw new ArgumentNullException(nameof(publicSettings)); + + this.enabled = publicSettings.enabled; + this.runIntervalInMinutes = publicSettings.runIntervalInMinutes; + this.settings = new QuerySettings(publicSettings.settings); + this.lastUpdated = DateTime.UtcNow; + // keep SettingName unchanged unless public model provides one + if (!string.IsNullOrWhiteSpace(publicSettings.name)) + { + this.settingName = publicSettings.name; + } + } + + public ScrapperSettings ToPublicModel() + { + return new ScrapperSettings + { + id = this.id, + name = this.settingName, + enabled = this.enabled, + lastUpdated = this.lastUpdated, + lastRunTime = this.lastRunTime, + runIntervalInMinutes = this.runIntervalInMinutes, + settings = new PublicSettingsModel + { + query = this.settings.query, + locations = this.settings.locations, + sitesToInclude = this.settings.sitesToInclude, + sitesToExclude = this.settings.sitesToExclude, + exactTerms = this.settings.exactTerms, + negativeTerms = this.settings.negativeTerms, + additionalSearchTerms = this.settings.additionalSearchTerms, + lookBackDays = this.settings.lookBackDays + } + }; + } + + public QuerySettings GetQuerySettings() + { + return this.settings; + } + } +} \ No newline at end of file diff --git a/src/Common/Models/ProblemSchema.cs b/src/Common/DatabaseModels/ProblemSchema.cs similarity index 55% rename from src/Common/Models/ProblemSchema.cs rename to src/Common/DatabaseModels/ProblemSchema.cs index edbc20d..137bb81 100644 --- a/src/Common/Models/ProblemSchema.cs +++ b/src/Common/DatabaseModels/ProblemSchema.cs @@ -1,30 +1,32 @@ -namespace Common.Models +using Common.Models; + +namespace Common.DatabaseModels { public class ProblemSchema { public ProblemSchema() { } public ProblemSchema(ProblemSchema ps) { - this.id = ps.id; - this.title = ps.title; - this.url = ps.url; - this.difficulty = ps.difficulty; - this.acceptance = ps.acceptance; - this.frequency = ps.frequency; - this.companyList = new List>>(); - this.metadataList = new List>(); + id = ps.id; + title = ps.title; + url = ps.url; + difficulty = ps.difficulty; + acceptance = ps.acceptance; + frequency = ps.frequency; + companyList = new List>>(); + metadataList = new List>(); } public ProblemSchema(Problem p) { - this.id = p.id; - this.title = p.title; - this.url = p.url; - this.difficulty = p.difficulty; - this.acceptance = p.acceptance; - this.frequency = p.frequency; - this.companyList = p.companies.Select(kv => new KeyValuePair>(kv.Key, kv.Value.ToList())).ToList(); - this.metadataList = p.metadata.Select(kv => new KeyValuePair(kv.Key, kv.Value)).ToList(); + id = p.id; + title = p.title; + url = p.url; + difficulty = p.difficulty; + acceptance = p.acceptance; + frequency = p.frequency; + companyList = p.companies.Select(kv => new KeyValuePair>(kv.Key, kv.Value.ToList())).ToList(); + metadataList = p.metadata.Select(kv => new KeyValuePair(kv.Key, kv.Value)).ToList(); } public string id { get; set; } = string.Empty; diff --git a/src/Common/DatabaseModels/QuerySettings.cs b/src/Common/DatabaseModels/QuerySettings.cs new file mode 100644 index 0000000..aa447a6 --- /dev/null +++ b/src/Common/DatabaseModels/QuerySettings.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using PublicSettingsModel = Common.Models.Public.QuerySettings; + +namespace Common.DatabaseModels +{ + + public class QuerySettings + { + public string query { get; set; } + public List locations { get; set; } + public List sitesToInclude { get; set; } + public List sitesToExclude { get; set; } + public List exactTerms { get; set; } + public List negativeTerms { get; set; } + public int lookBackDays { get; set; } = 1; + public List additionalSearchTerms { get; set; } + + public QuerySettings(PublicSettingsModel qs) + { + query = qs.query; + locations = qs.locations; + sitesToInclude = qs.sitesToInclude; + sitesToExclude = qs.sitesToExclude; + exactTerms = qs.exactTerms; + negativeTerms = qs.negativeTerms; + additionalSearchTerms = qs.additionalSearchTerms; + } + } +} diff --git a/src/Common/DatabaseModels/ScrappedJob.cs b/src/Common/DatabaseModels/ScrappedJob.cs new file mode 100644 index 0000000..f334826 --- /dev/null +++ b/src/Common/DatabaseModels/ScrappedJob.cs @@ -0,0 +1,37 @@ +using Common.Models; + +namespace Common.DatabaseModels +{ + public class ScrappedJob + { + public string id { get; set; } + public string title { get; set; } + public string displayLink { get; set; } + public string snippet { get; set; } + public string description { get; set; } + public string link { get; set; } + public DateTime scrappedTime { get; set; } + public DateTime JobPostedTime { get; set; } + public string companyName { get; set; } + public string jobType { get; set; } + public string location { get; set; } + public List tags { get; set; } = new List(); + + public ScrappedJob() { } + public ScrappedJob(Item item, DateTime scrappedTime) + { + title = item.title; + displayLink = item.displayLink; + snippet = item.snippet; + link = item.link; + id = GenerateHashId(item.link, item.title, item.displayLink); + this.scrappedTime = scrappedTime; + description = "NA"; + } + + private string GenerateHashId(string v1, string v2, string v3) + { + return Helper.FastHashId.GenerateHashId(v1, v2, v3); + } + } +} \ No newline at end of file diff --git a/src/Common/Engines/AIEngine.cs b/src/Common/Engines/AIEngine.cs new file mode 100644 index 0000000..ee3fa22 --- /dev/null +++ b/src/Common/Engines/AIEngine.cs @@ -0,0 +1,132 @@ +namespace Common.Engines +{ + using Azure; + using Azure.AI; + using Azure.Identity; + using Azure.AI.Inference; + using Azure.AI.Projects; + using Azure.AI.Agents.Persistent; + using System.Diagnostics; + using Newtonsoft.Json; + using Microsoft.Extensions.Logging; + using Microsoft.Extensions.Configuration; + using Common.DatabaseModels; + + public class AIEngine + { + private const string AI_SERVICE_ENDPOINT = "https://job-analyzer.services.ai.azure.com/api/projects/firstProject"; + private const string AGENT_ID = "asst_gWZPhAs5gg4jVvmuto9sop5h"; + private readonly ILogger logger; + private readonly IConfiguration configuration; + private PersistentAgent agent; + private PersistentAgentsClient agentsClient; + public AIEngine(IConfiguration configuration, ILogger logger) + { + this.logger = logger; + this.configuration = configuration; + for (int i = 0; i < 3; i++) + { + try + { + this.agentsClient = new(AI_SERVICE_ENDPOINT, new DefaultAzureCredential()); + this.agent = this.agentsClient.Administration.GetAgent(AGENT_ID); + this.logger.LogInformation($"AIEngine initialized successfully. Endpoint: {AI_SERVICE_ENDPOINT}, AgentId: {AGENT_ID}"); + break; + } + catch (Exception ex) + { + logger.LogError($"Error initializing AIEngine: {ex.Message}"); + Task.Delay((i + 1) * 2000).ConfigureAwait(false).GetAwaiter().GetResult(); + } + } + + if (!IsReady()) + { + this.logger.LogError("AIEngine failed to initialize properly."); + throw new InvalidOperationException("AIEngine failed to initialize properly."); + } + } + + public bool IsReady() + { + return this.agent != null && this.agentsClient != null; + } + + public async Task>> GetJobLevelAsync(List scrappedJobs) + { + var results = new List>(); + this.logger.LogInformation($"Processing {scrappedJobs.Count} scrapped jobs. Ready: {IsReady()}"); + for (int i=0; i < scrappedJobs.Count; i += 20) + { + var batch = scrappedJobs.Skip(i).Take(20).ToList(); + try + { + var sw = Stopwatch.StartNew(); + var prompt = JsonConvert.SerializeObject(batch); + var response = await GetResponseInternalAsync(prompt); + sw.Stop(); + this.logger.LogInformation($"Processed jobs: {string.Join(",", batch.Select(j => j.id))} | response: {response}"); + var kvList = response.Split(",").Select(kvs => kvs.Split(":")).Where(kv => kv.Length == 2).Select(kv => new KeyValuePair(kv[0].Trim(), kv[1].Trim())).ToList(); + results.AddRange(kvList); + } + catch (Exception ex) + { + this.logger.LogError($"Error processing batch: {string.Join(",", batch.Select(j => j.id))} | {ex.Message}"); + } + } + return results; + } + + private async Task GetResponseInternalAsync(string input) + { + if (!IsReady()) + { + logger.LogError($"AIEngine is not properly initialized. Input: {input}"); + throw new InvalidOperationException("AIEngine not initialized."); + } + + var threadResponse = await agentsClient.Threads.CreateThreadAsync(); + var thread = threadResponse.Value; + + try + { + await agentsClient.Messages.CreateMessageAsync(thread.Id, MessageRole.User, input); + var runResponse = await agentsClient.Runs.CreateRunAsync(thread.Id, agent.Id); + var run = runResponse.Value; + + // Poll until terminal state + do + { + await Task.Delay(500); + run = await agentsClient.Runs.GetRunAsync(thread.Id, run.Id); + } + while (run.Status == RunStatus.Queued || run.Status == RunStatus.InProgress); + + if (run.Status != RunStatus.Completed) + { + logger.LogError($"Run failed. ThreadId={thread.Id}, Error={run.LastError?.Message}"); + throw new InvalidOperationException($"Run failed: {run.LastError?.Message}"); + } + + // Fetch all messages in ascending order + var messages = agentsClient.Messages.GetMessagesAsync(thread.Id, order: ListSortOrder.Ascending); + + string response = string.Empty; + PersistentThreadMessage lastThreadMessage = messages.ToBlockingEnumerable().Last(); + foreach (MessageContent contentItem in lastThreadMessage.ContentItems) + { + if (contentItem is MessageTextContent textItem) + { + response += textItem.Text; + } + } + + return response; + } + finally + { + await agentsClient.Threads.DeleteThreadAsync(thread.Id); + } + } + } +} \ No newline at end of file diff --git a/src/Common/Engines/GSEngine.cs b/src/Common/Engines/GSEngine.cs new file mode 100644 index 0000000..f871261 --- /dev/null +++ b/src/Common/Engines/GSEngine.cs @@ -0,0 +1,238 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; +using Common.DatabaseModels; +using Common.Models; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json; + +namespace Common.Engines +{ + public class GSEngine + { + private readonly string apiKey; + private readonly string searchEngineId; + private readonly HttpClient httpClient; + private string baseUrl = "https://customsearch.googleapis.com/customsearch/v1"; + private int maxResultsPerSearch = 150; + private readonly ILogger logger; + + public GSEngine(IConfiguration configuration, ILogger _logger) + { + this.apiKey = configuration["GoogleSearch:ApiKey"] ?? throw new ArgumentNullException("Google Search API Key is not configured."); + this.searchEngineId = configuration["GoogleSearch:SearchEngineId"] ?? throw new ArgumentNullException("Google Search Engine ID is not configured."); + this.logger = _logger; + this.httpClient = new HttpClient(); + } + + public async Task> SearchQueryAsync(JobScrapperSettings settings) + { + if (settings == null) throw new ArgumentNullException(nameof(settings)); + + var qsettings = settings.GetQuerySettings() ?? throw new InvalidOperationException("Query settings cannot be null."); + var allJobs = new List(); + int startIndex = 1; + int totalResults = 0; + + var sb = new StringBuilder(); + sb.Append($"{this.baseUrl}?key={apiKey}&cx={searchEngineId}"); + + // base query + var baseQuery = qsettings.query ?? string.Empty; + sb.Append($"&q={Uri.EscapeDataString(baseQuery)}"); + + // date restriction + if (qsettings.lookBackDays > 0) + { + sb.Append(AddDateRestrictionToQuery(qsettings.lookBackDays)); + } + + // Exact terms (join list if provided) + if (qsettings.exactTerms != null && qsettings.exactTerms.Any()) + { + var exact = string.Join(" ", qsettings.exactTerms.Where(s => !string.IsNullOrWhiteSpace(s))); + if (!string.IsNullOrWhiteSpace(exact)) sb.Append(AddExactTermsToQuery(exact)); + } + + // Negative terms + if (qsettings.negativeTerms != null && qsettings.negativeTerms.Any()) + { + var neg = string.Join(" ", qsettings.negativeTerms.Where(s => !string.IsNullOrWhiteSpace(s))); + if (!string.IsNullOrWhiteSpace(neg)) sb.Append(AddNegativeTermToQuery(neg)); + } + + // Location - use first location if present (api uses gl for country) + if (qsettings.locations != null && qsettings.locations.Any() && !string.IsNullOrWhiteSpace(qsettings.locations.First())) + { + sb.Append(AddClientLocationToQuery(qsettings.locations.First())); + } + + // Site include / exclude - use first for siteSearch (API supports one siteSearch parameter) + if (qsettings.sitesToInclude != null && qsettings.sitesToInclude.Any() && !string.IsNullOrWhiteSpace(qsettings.sitesToInclude.First())) + { + sb.Append(AddSiteSearchToQuery(qsettings.sitesToInclude.First())); + } + else if (qsettings.sitesToExclude != null && qsettings.sitesToExclude.Any() && !string.IsNullOrWhiteSpace(qsettings.sitesToExclude.First())) + { + // prefer include if present; otherwise exclude + sb.Append(AddExcludeSiteSearchFromQuery(qsettings.sitesToExclude.First())); + } + + // Additional terms (hq) + if (qsettings.additionalSearchTerms != null && qsettings.additionalSearchTerms.Any()) + { + var add = string.Join(" ", qsettings.additionalSearchTerms.Where(s => !string.IsNullOrWhiteSpace(s))); + if (!string.IsNullOrWhiteSpace(add)) sb.Append(AddadditionalSearchterms(add)); + } + + var template = sb.ToString(); + + do + { + var url = template + AddStartIndexToQuery(startIndex); + var res = await SearchRawUrlAsync(url); + if (res == null) + { + logger.LogError("SearchRawUrlAsync returned null for url: {url}", url); + break; + } + + // No items => stop + if (res.items == null || res.items.Count == 0) + { + logger.LogInformation("No items returned for url: {url}", url); + break; + } + + foreach (var item in res.items) + { + try + { + var job = new ScrappedJob(item, DateTime.UtcNow); + allJobs.Add(job); + } + catch (Exception ex) + { + logger.LogWarning(ex, "Skipping item due to processing error."); + } + } + + // Determine total results + if (!string.IsNullOrWhiteSpace(res.searchInformation?.totalResults)) + { + if (!int.TryParse(res.searchInformation.totalResults, out totalResults)) + { + // try fallback to queries.request[0].totalResults + var reqTotal = res.queries?.request?.FirstOrDefault()?.totalResults; + if (!int.TryParse(reqTotal, out totalResults)) totalResults = int.MaxValue; + } + } + else + { + var reqTotal = res.queries?.request?.FirstOrDefault()?.totalResults; + if (!int.TryParse(reqTotal, out totalResults)) totalResults = int.MaxValue; + } + + // Advance to next page if present + if (res.queries?.nextPage != null && res.queries.nextPage.Count > 0) + { + var next = res.queries.nextPage[0]; + // Use next.startIndex if present; otherwise increment by count + if (next.startIndex > 0) + { + startIndex = next.startIndex; + } + else + { + var count = res.queries.request?.FirstOrDefault()?.count ?? res.items.Count; + if (count <= 0) break; + startIndex += count; + } + } + else + { + // no next page -> stop + break; + } + + // safety: prevent infinite looping + if (startIndex <= 0 || startIndex > maxResultsPerSearch) break; + } + while (startIndex <= maxResultsPerSearch && (totalResults == 0 || startIndex <= totalResults)); + + this.logger.LogInformation("Fetched {count} jobs. Total available (approx): {total}. Url template: {template}", allJobs.Count, totalResults, template); + return allJobs; + } + + public async Task SearchRawUrlAsync(string url) + { + try + { + var response = await httpClient.GetAsync(url); + if (!response.IsSuccessStatusCode) + { + logger.LogWarning("Google Search API returned status {status} for url {url}", response.StatusCode, url); + return null; + } + + var content = await response.Content.ReadAsStringAsync(); + return JsonConvert.DeserializeObject(content); + } + catch (Exception ex) + { + logger.LogError(ex, "Error occurred during Google Search API call."); + } + + return null; + } + + private string AddClientLocationToQuery(string location = "in") + { + return $"&gl={Uri.EscapeDataString(location)}"; + } + + private string AddDateRestrictionToQuery(int previousNDays = 1) + { + return $"&dateRestrict=d{previousNDays}"; + } + + private string AddNegativeTermToQuery(string phrase = "manager") + { + return $"&excludeTerms={Uri.EscapeDataString(phrase)}"; + } + + private string AddExactTermsToQuery(string phrase = "Software Engineer") + { + return $"&exactTerms={Uri.EscapeDataString(phrase)}"; + } + + private string AddSiteSearchToQuery(string site = "linkedin.com") + { + return $"&siteSearch={Uri.EscapeDataString(site)}&siteSearchFilter=i"; + } + + private string AddExcludeSiteSearchFromQuery(string site = "linkedin.com") + { + return $"&siteSearch={Uri.EscapeDataString(site)}&siteSearchFilter=e"; + } + + private string AddSortingToQuery(string sort = "date") + { + return $"&sort={Uri.EscapeDataString(sort)}"; + } + + private string AddadditionalSearchterms(string terms = "India") + { + return $"&hq={Uri.EscapeDataString(terms)}"; + } + + private string AddStartIndexToQuery(int startIndex = 1) + { + return $"&start={startIndex}"; + } + } +} \ No newline at end of file diff --git a/src/Common/Enums/CosmosContainerEnum.cs b/src/Common/Enums/CosmosContainerEnum.cs index 538f048..cf53d10 100644 --- a/src/Common/Enums/CosmosContainerEnum.cs +++ b/src/Common/Enums/CosmosContainerEnum.cs @@ -8,6 +8,8 @@ namespace Common.Enums { public enum CosmosContainerEnum { - ProblemsContainer + ProblemsContainer, + JobsContainer, + ScrapperSettingsContainer } } diff --git a/src/Common/Factories/CosmosContainerFactory.cs b/src/Common/Factories/CosmosContainerFactory.cs index d526140..f54b062 100644 --- a/src/Common/Factories/CosmosContainerFactory.cs +++ b/src/Common/Factories/CosmosContainerFactory.cs @@ -3,6 +3,7 @@ using Common.Models.Miscellaneous; using Microsoft.Azure.Cosmos; using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; namespace Common.Factories { @@ -12,25 +13,30 @@ public class CosmosContainerFactory : ICosmosContainerFactory private readonly IConfiguration _configuration; - public CosmosContainerFactory(CosmosClient cosmosClient, IConfiguration configuration) + private readonly ILogger _logger; + public CosmosContainerFactory(CosmosClient cosmosClient, + IConfiguration configuration, + ILogger logger) { _cosmosClient = cosmosClient; _configuration = configuration; + _logger = logger; } public Container GetContainer(CosmosContainerEnum container) { var containerDetails = LoadContainerDetails(); - switch (container) + + if(!containerDetails.ContainsKey(container)) { - case CosmosContainerEnum.ProblemsContainer: - var dbId = containerDetails[container].DatabaseName; - var containerId = containerDetails[container].ContainerName; - var db = _cosmosClient.GetDatabase(dbId); - return db.GetContainer(containerId); - default: - throw new ArgumentOutOfRangeException(nameof(container), container, null); + _logger.LogError("Container details not found for container: {Container}", container); + throw new ArgumentOutOfRangeException(nameof(container), container, null); } + + var databaseName = containerDetails[container].DatabaseName; + var containerName = containerDetails[container].ContainerName; + var dbInstnace = _cosmosClient.GetDatabase(databaseName); + return dbInstnace.GetContainer(containerName); } private Dictionary LoadContainerDetails() @@ -41,6 +47,14 @@ private Dictionary LoadContainerDetails() { CosmosContainerEnum.ProblemsContainer, new ContainerDetails(config[ConfigurationConstants.LCProjectDatabaseNameKey], config[ConfigurationConstants.LCProjectContainerNameKey]) + }, + { + CosmosContainerEnum.JobsContainer, + new ContainerDetails(config[ConfigurationConstants.JobsProjectDatabaseNameKey], config[ConfigurationConstants.JobsProjectContainerNameKey]) + }, + { + CosmosContainerEnum.ScrapperSettingsContainer, + new ContainerDetails(config[ConfigurationConstants.JobsProjectDatabaseNameKey], config[ConfigurationConstants.JobsScraperSettingsContainerNameKey]) } }; } diff --git a/src/Common/Helper.cs b/src/Common/Helper.cs new file mode 100644 index 0000000..435650e --- /dev/null +++ b/src/Common/Helper.cs @@ -0,0 +1,51 @@ +namespace Common.Helper +{ + using System; + using System.Collections.Generic; + using System.Text; + + public static class FastHashId + { + public static string GenerateHashId(string part1, string part2, string part3) + { + return GenerateHashId(new List { part1, part2, part3 }); + } + + public static string GenerateHashId(List input) + { + string combined = string.Join("|", input); + ulong hash = 14695981039346656037UL; // FNV offset basis + const ulong prime = 1099511628211UL; + + foreach (byte b in Encoding.UTF8.GetBytes(combined)) + { + hash ^= b; + hash *= prime; + } + + // Convert hash to Base36 (alphanumeric) for compactness + string base36 = ToBase36(hash); + + // Ensure it's exactly 10 characters + return base36.Length > 10 ? base36.Substring(0, 10) : base36.PadLeft(10, '0'); + } + + private static string ToBase36(ulong value) + { + const string chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + StringBuilder sb = new StringBuilder(); + while (value > 0) + { + sb.Insert(0, chars[(int)(value % 36)]); + value /= 36; + } + return sb.ToString(); + } + + public static string GenerateRandonGuid() + { + return Guid.NewGuid().ToString("N"); + } + } + +} diff --git a/src/Common/IFilter.cs b/src/Common/IFilter.cs new file mode 100644 index 0000000..0a90e8c --- /dev/null +++ b/src/Common/IFilter.cs @@ -0,0 +1,7 @@ +namespace Common +{ + public interface IFilter + { + public List ApplyFilterAsync(List entities); + } +} \ No newline at end of file diff --git a/src/Common/Managers/JobScrapper.cs b/src/Common/Managers/JobScrapper.cs new file mode 100644 index 0000000..84d6a07 --- /dev/null +++ b/src/Common/Managers/JobScrapper.cs @@ -0,0 +1,79 @@ +namespace Common.Managers + +{ + using Common.DatabaseModels; + using Common.Engines; + using Common.Repositories; + using Microsoft.Extensions.Logging; + + public class JobScrapper + { + private JobScrapperSettings settings; + private GSEngine gsEngine; + private AIEngine aiEngine; + private JobsRepository jobsRepository; + private ILogger logger; + + public JobScrapper(GSEngine gsEngine, AIEngine aiEngine, JobsRepository jobsRepo, ILogger logger) + { + this.logger = logger; + this.gsEngine = gsEngine; + this.aiEngine = aiEngine; + this.jobsRepository = jobsRepo; + } + + public void ConfigureSettings(JobScrapperSettings settings) + { + this.settings = settings; + } + + public async Task RunAsync() + { + var startTime = DateTime.UtcNow; + this.logger.LogInformation($"Starting JobScrapper run for settings: {this.settings}"); + + var searchResults = await gsEngine.SearchQueryAsync(this.settings); + + if (searchResults == null || searchResults.Count == 0) + { + this.logger.LogInformation($"Nothing to process. Query settings: {this.settings}"); + return; + } + + var mp = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var job in searchResults) + { + if (!mp.ContainsKey(job.id)) + { + mp[job.id] = job; + } + } + + var levels = await this.aiEngine.GetJobLevelAsync(searchResults); + foreach (var level in levels) + { + if (mp.ContainsKey(level.Key)) + { + mp[level.Key].tags.AddRange(level.Value.Split("-")); + } + else + { + this.logger.LogWarning($"Job ID {level.Key} not found in search results while assigning level tag."); + } + } + + foreach (var job in searchResults) + { + var success = await this.jobsRepository.CreateIfNotExistsAsync(job); + if (!success) + { + this.logger.LogError($"Failed to push job {job.id} to JobsRepository."); + } + } + + var duration = DateTime.UtcNow - startTime; + this.logger.LogInformation($"JobScrapper run completed. Duration: {duration}. Processed {searchResults.Count} jobs for settings: {this.settings}"); + } + + } +} \ No newline at end of file diff --git a/src/Common/Managers/JobScrapperSettingsManager.cs b/src/Common/Managers/JobScrapperSettingsManager.cs new file mode 100644 index 0000000..2c9f48f --- /dev/null +++ b/src/Common/Managers/JobScrapperSettingsManager.cs @@ -0,0 +1,93 @@ +namespace Common.Managers +{ + using Common.DatabaseModels; + using Common.Enums; + using Common.Factories; + using Common.Models.Public; + using Microsoft.Azure.Cosmos; + using Microsoft.Extensions.Logging; + public class JobScrapperSettingsManager + { + private readonly Container _scrapperSettingsContainer; + private readonly ILogger _logger; + + public JobScrapperSettingsManager(ICosmosContainerFactory cosmosContainerFactory, + ILogger logger) + { + _scrapperSettingsContainer = cosmosContainerFactory.GetContainer(CosmosContainerEnum.ScrapperSettingsContainer); + _logger = logger; + } + + public async Task CreateOrUpdateSettings(string id, ScrapperSettings publicSettings) + { + if(publicSettings == null) + { + throw new ArgumentNullException(nameof(publicSettings), "Public settings cannot be null"); + } + + var settingsInDb = await this.GetAllSettings(); + JobScrapperSettings current = null; + if(!string.IsNullOrEmpty(id) && settingsInDb.Any(s => s.id.Equals(id, StringComparison.OrdinalIgnoreCase))) + { + current = settingsInDb.First(s => s.id.Equals(id, StringComparison.OrdinalIgnoreCase)); + } + + if (current != null) + { + current.UpdateFromPublicModel(publicSettings); + } + else + { // TODO: Restrict total number of settings to 5 + if (settingsInDb.Count >= 5) + { + throw new InvalidOperationException("[TooManySettings]: Cannot create more than 5 scrapper settings."); + } + current = new JobScrapperSettings(id, publicSettings.name, publicSettings.runIntervalInMinutes, publicSettings.settings, true); + } + + await _scrapperSettingsContainer.UpsertItemAsync(current); + return current; + } + + public async Task GetSettingsById(string id) + { + var allSettings = await GetAllSettings(); + var setting = allSettings.FirstOrDefault(s => s.id.Equals(id, StringComparison.OrdinalIgnoreCase)); + + if (setting == null) + { + _logger.LogError($"No JobScrapperSettings found with id: {id}"); + throw new KeyNotFoundException($"No JobScrapperSettings found with id: {id}"); + } + + return setting; + } + + public async Task UpdateSettingsAsync(string id, JobScrapperSettings jobSetting) + { + try + { + await _scrapperSettingsContainer.UpsertItemAsync(jobSetting, new PartitionKey(id)); + _logger.LogInformation($"Successfully updated JobScrapperSettings with id: {id}"); + } + catch (Exception ex) + { + _logger.LogError($"Error updating JobScrapperSettings with id: {id}. Exception: {ex.Message}"); + return false; + } + return true; + } + + public async Task> GetAllSettings() + { + var settingsInDb = _scrapperSettingsContainer.GetItemQueryIterator($"SELECT * from c"); + var allSettings = new List(); + while (settingsInDb.HasMoreResults) + { + var response = await settingsInDb.ReadNextAsync(); + allSettings.AddRange(response); + } + return allSettings; + } + } +} \ No newline at end of file diff --git a/src/Common/Models/GSResult.cs b/src/Common/Models/GSResult.cs new file mode 100644 index 0000000..c1882c0 --- /dev/null +++ b/src/Common/Models/GSResult.cs @@ -0,0 +1,132 @@ +namespace Common.Models +{ + public class GSResult + { + public string kind { get; set; } + public UrlInfo url { get; set; } + public Queries queries { get; set; } + public Context context { get; set; } + public SearchInformation searchInformation { get; set; } + public List items { get; set; } + } + + public class UrlInfo + { + public string type { get; set; } + public string template { get; set; } + } + + public class Queries + { + public List request { get; set; } + public List nextPage { get; set; } + } + + public class QueryRequest + { + public string totalResults { get; set; } + public int count { get; set; } + public int startIndex { get; set; } + public string inputEncoding { get; set; } + public string outputEncoding { get; set; } + public string safe { get; set; } + public string cx { get; set; } + public string sort { get; set; } + public string gl { get; set; } + public string siteSearch { get; set; } + public string siteSearchFilter { get; set; } + public string exactTerms { get; set; } + public string excludeTerms { get; set; } + public string dateRestrict { get; set; } + } + + public class Context + { + public string title { get; set; } + } + + public class SearchInformation + { + public double searchTime { get; set; } + public string formattedSearchTime { get; set; } + public string totalResults { get; set; } + public string formattedTotalResults { get; set; } + } + + public class Item + { + public string kind { get; set; } + public string title { get; set; } + public string htmlTitle { get; set; } + public string link { get; set; } + public string displayLink { get; set; } + public string snippet { get; set; } + public string htmlSnippet { get; set; } + public string formattedUrl { get; set; } + public string htmlFormattedUrl { get; set; } + // public PageMap pagemap { get; set; } // Not in use currently + } + + /* + #region PageMapClasses + public class PageMap + { + public List metatags { get; set; } + public List cse_thumbnail { get; set; } + public List cse_image { get; set; } + public List BreadcrumbList { get; set; } + public List organization { get; set; } + } + + public class MetaTag + { + public string image { get; set; } + public string og_type { get; set; } + public string viewport { get; set; } + public string title { get; set; } + public string og_url { get; set; } + public string og_image { get; set; } + public string og_site_name { get; set; } + public string og_locale { get; set; } + public string og_description { get; set; } + public string twitter_card { get; set; } + public string twitter_image { get; set; } + public string author { get; set; } + public string url { get; set; } + public string position { get; set; } + public string referrer { get; set; } + public string csrf_token { get; set; } + public string csrf_param { get; set; } + public string jobidentifier { get; set; } + public string og_image_width { get; set; } + public string og_image_height { get; set; } + public string http_ogp_me_ns_article_published_time { get; set; } + public string http_ogp_me_ns_article_modified_time { get; set; } + public string http_ogp_me_ns_article_section { get; set; } + public string twitter_site { get; set; } + } + + public class CseThumbnail + { + public string src { get; set; } + public string width { get; set; } + public string height { get; set; } + } + + public class CseImage + { + public string src { get; set; } + } + + public class BreadcrumbList + { + // Add properties if needed + } + + public class Organization + { + public string sameas { get; set; } + } + #endregion PageMapClasses + */ +} \ No newline at end of file diff --git a/src/Common/Models/Problem.cs b/src/Common/Models/Problem.cs index 27b6f19..714968f 100644 --- a/src/Common/Models/Problem.cs +++ b/src/Common/Models/Problem.cs @@ -1,3 +1,5 @@ +using Common.DatabaseModels; + namespace Common.Models { public enum Difficulty diff --git a/src/Common/Models/QueryParam.cs b/src/Common/Models/Public/QueryParam.cs similarity index 91% rename from src/Common/Models/QueryParam.cs rename to src/Common/Models/Public/QueryParam.cs index 7142689..73c8b07 100644 --- a/src/Common/Models/QueryParam.cs +++ b/src/Common/Models/Public/QueryParam.cs @@ -1,4 +1,4 @@ -namespace Backend.Models.Public +namespace Common.Models.Public { public class QueryParam { diff --git a/src/Common/Models/Public/QuerySettings.cs b/src/Common/Models/Public/QuerySettings.cs new file mode 100644 index 0000000..4bb8415 --- /dev/null +++ b/src/Common/Models/Public/QuerySettings.cs @@ -0,0 +1,15 @@ +namespace Common.Models.Public +{ + + public class QuerySettings + { + public string query { get; set; } + public List locations { get; set; } + public List sitesToInclude { get; set; } + public List sitesToExclude { get; set; } + public List exactTerms { get; set; } + public List negativeTerms { get; set; } + public List additionalSearchTerms { get; set; } + public int lookBackDays { get; set; } + } +} diff --git a/src/Common/Models/Public/ScrapperSettings.cs b/src/Common/Models/Public/ScrapperSettings.cs new file mode 100644 index 0000000..85d424b --- /dev/null +++ b/src/Common/Models/Public/ScrapperSettings.cs @@ -0,0 +1,13 @@ +namespace Common.Models.Public +{ + public class ScrapperSettings + { + public string id { get; set; } + public string name { get; set; } + public bool enabled { get; set; } + public DateTime lastUpdated { get; set; } + public DateTime lastRunTime { get; set; } + public int runIntervalInMinutes { get; set; } + public QuerySettings settings { get; set; } + } +} \ No newline at end of file diff --git a/src/Common/Queries/JobQuery.cs b/src/Common/Queries/JobQuery.cs new file mode 100644 index 0000000..3bcce34 --- /dev/null +++ b/src/Common/Queries/JobQuery.cs @@ -0,0 +1,12 @@ +namespace Common.Queries +{ + public class JobQuery + { + public string JobType { get; set; } // Software Engineer, Data Scientist, etc. + public DateTime StartDate { get; set; } = DateTime.UtcNow; // Start date for the job posting + public DateTime EndDate { get; set; } = DateTime.UtcNow; // End date for the job posting + public List Companies { get; set; } // List of companies to filter + public List Locations { get; set; } // List of locations to filter + public string JobLevel { get; set; } // Entry Level, Mid Level, Senior Level, etc. + } +} diff --git a/src/Common/Repositories/JobScrapperSettingsRepository.cs b/src/Common/Repositories/JobScrapperSettingsRepository.cs new file mode 100644 index 0000000..c997470 --- /dev/null +++ b/src/Common/Repositories/JobScrapperSettingsRepository.cs @@ -0,0 +1,53 @@ +using Common.DatabaseModels; +using Common.Enums; +using Common.Factories; +using Common.Managers; +using Microsoft.Azure.Cosmos; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Common.Repositories +{ + public class JobScrapperSettingsRepository + { + private readonly Container _scrapperSettingsContainer; + private readonly ILogger _logger; + + public JobScrapperSettingsRepository(ICosmosContainerFactory cosmosContainerFactory, + ILogger logger) + { + _scrapperSettingsContainer = cosmosContainerFactory.GetContainer(CosmosContainerEnum.ScrapperSettingsContainer); + _logger = logger; + } + + public async Task> GetAllSettings() + { + var settingsInDb = _scrapperSettingsContainer.GetItemQueryIterator($"SELECT * from c"); + var allSettings = new List(); + while (settingsInDb.HasMoreResults) + { + var response = await settingsInDb.ReadNextAsync(); + allSettings.AddRange(response); + } + return allSettings; + } + + public async Task UpdateSettingsAsync(string id, JobScrapperSettings jobSetting) + { + try + { + await _scrapperSettingsContainer.UpsertItemAsync(jobSetting, new PartitionKey(id)); + _logger.LogInformation($"Successfully updated JobScrapperSettings with id: {id}"); + } + catch (Exception ex) + { + _logger.LogError($"Error updating JobScrapperSettings with id: {id}. Exception: {ex.Message}"); + throw; + } + } + } +} diff --git a/src/Common/Repositories/JobsRepository.cs b/src/Common/Repositories/JobsRepository.cs new file mode 100644 index 0000000..b2b2c7a --- /dev/null +++ b/src/Common/Repositories/JobsRepository.cs @@ -0,0 +1,207 @@ +namespace Common.Repositories +{ + using Common.DatabaseModels; + using Common.Enums; + using Common.Factories; + using Common.Queries; + using Microsoft.Azure.Cosmos; + using Microsoft.Extensions.Logging; + using System.Net; + + public class JobsRepository + { + private readonly Container jobsContainer; + private readonly ILogger logger; + + public JobsRepository(ICosmosContainerFactory cosmosContainerFactory, + ILogger logger) + { + this.jobsContainer = cosmosContainerFactory.GetContainer(CosmosContainerEnum.JobsContainer); + this.logger = logger; + } + + public async Task> GetAllLatestJobsAsync() + { + var query = "SELECT * FROM c ORDER BY c.scrappedTime DESC OFFSET 0 LIMIT 100"; + return await QueryJobsAsync(query); + } + + public async Task> GetAllJobsInLastOneDay() + { + var query = $"SELECT * FROM c WHERE DateTimeToTimestamp(GetCurrentTimestamp()) - DateTimeToTimestamp(c.scrappedTime) < 86400"; + return await QueryJobsAsync(query); + } + + public async Task GetJobByIdAsync(string id) + { + try + { + // TODO: NOT working as expected + var response = await this.jobsContainer.ReadItemAsync(id, new PartitionKey(id)); + return response.Resource; + } + catch (CosmosException cosmosEx) when (cosmosEx.StatusCode == System.Net.HttpStatusCode.NotFound) + { + this.logger.LogWarning($"Job: {id} not found in container."); + return null; + } + catch (Exception ex) + { + this.logger.LogError($"Failed to retrieve job: {id} from container. Ex: {ex}"); + return null; + } + } + + /// + /// Create the item only if it does not already exist using a single DB call. + /// Returns true if the item was created, false if it already existed. + /// + public async Task CreateIfNotExistsAsync(ScrappedJob job) + { + if (job == null) throw new ArgumentNullException(nameof(job)); + try + { + var requestOptions = new ItemRequestOptions + { + // Instruct Cosmos to only create if the item does not exist. + // SDK will translate this to an If-None-Match header. + IfNoneMatchEtag = "*" + }; + + var response = await this.jobsContainer.CreateItemAsync(job, new PartitionKey(job.id), requestOptions); + // Created successfully + this.logger.LogInformation("Created job {id} in Cosmos DB. RU charge: {ru}", job.id, response.RequestCharge); + return true; + } + catch (CosmosException ex) when (ex.StatusCode == HttpStatusCode.PreconditionFailed || ex.StatusCode == HttpStatusCode.Conflict) + { + // Item already exists (server enforces the If-None-Match precondition). + this.logger.LogInformation("Job {id} already exists. Skipping create.", job.id); + return false; + } + catch (Exception ex) + { + this.logger.LogError(ex, "Failed to create job {id} in Cosmos DB.", job.id); + throw; + } + } + + public async Task> GetJobsEasyQueryAsync(string location, string level) + { + var query = "SELECT * FROM c WHERE EXISTS ( SELECT VALUE t FROM t IN c.tags WHERE CONTAINS(LOWER(t), @location) OR CONTAINS(LOWER(t), @unknown) ) ORDER BY c.scrappedTime DESC OFFSET 0 LIMIT 1000"; + var queryDefinition = new QueryDefinition(query).WithParameter("@location", location.ToLower()).WithParameter("@unknown", "unknown"); + var res = await QueryJobsAsync(queryDefinition); + res = res.Where(j => j.tags.Any(t => t.Equals(level, StringComparison.OrdinalIgnoreCase))).ToList(); + return res; + } + + + private async Task> QueryJobsAsync(string query) + { + var queryDefinition = new QueryDefinition(query); + var queryResultSetIterator = jobsContainer.GetItemQueryIterator(queryDefinition); + List results = new List(); + while (queryResultSetIterator.HasMoreResults) + { + var response = await queryResultSetIterator.ReadNextAsync(); + results.AddRange(response); + } + this.logger.LogInformation($"Retrieved {results.Count} jobs from Cosmos DB. Query: {query}"); + return results; + } + private async Task> QueryJobsAsync(QueryDefinition queryDefinition) + { + var queryResultSetIterator = jobsContainer.GetItemQueryIterator(queryDefinition); + List results = new List(); + while (queryResultSetIterator.HasMoreResults) + { + var response = await queryResultSetIterator.ReadNextAsync(); + results.AddRange(response); + } + this.logger.LogInformation($"Retrieved {results.Count} jobs from Cosmos DB."); + return results; + } + + public async Task> GetJobsFromQuery(JobQuery jobquery) + { + if (jobquery == null) throw new ArgumentNullException(nameof(jobquery)); + + var sql = "SELECT * FROM c WHERE 1=1"; + var qd = new QueryDefinition(sql); + + // JobType: search title or tags + if (!string.IsNullOrWhiteSpace(jobquery.JobType)) + { + qd = qd.WithParameter("@jobType", jobquery.JobType); + sql += " AND CONTAINS(c.jobType, @jobType, true)"; + } + + // Companies (list) + if (jobquery.Companies != null && jobquery.Companies.Count > 0) + { + var companyConditions = new List(); + for (int i = 0; i < jobquery.Companies.Count; i++) + { + var param = $"@company{i}"; + qd = qd.WithParameter(param, jobquery.Companies[i]); + companyConditions.Add($"c.companyName = {param}"); + } + sql += " AND (" + string.Join(" OR ", companyConditions) + ")"; + } + + // Locations: fallback to searching in displayLink, snippet or description + if (jobquery.Locations != null && jobquery.Locations.Count > 0) + { + var locationConditions = new List(); + for (int i = 0; i < jobquery.Locations.Count; i++) + { + var param = $"@location{i}"; + qd = qd.WithParameter(param, jobquery.Locations[i]); + locationConditions.Add($"CONTAINS(c.location, {param}, true)"); + } + sql += " AND (" + string.Join(" OR ", locationConditions) + ")"; + } + + // JobLevel: search in tags array (case-insensitive contains) + if (!string.IsNullOrWhiteSpace(jobquery.JobLevel)) + { + qd = qd.WithParameter("@jobLevel", jobquery.JobLevel); + // Use EXISTS with an IN on the tags array and CONTAINS for case-insensitive matching + sql += " AND EXISTS(SELECT VALUE t FROM t IN c.tags WHERE CONTAINS(t, @jobLevel, true))"; + } + + // Date range (JobPostedTime) + if (jobquery.StartDate > DateTime.MinValue) + { + qd = qd.WithParameter("@startDate", jobquery.StartDate); + sql += " AND c.jobPostedTime >= @startDate"; + } + if (jobquery.EndDate > DateTime.MinValue) + { + qd = qd.WithParameter("@endDate", jobquery.EndDate); + sql += " AND c.jobPostedTime <= @endDate"; + } + + // final ordering / limit - optional, keep callers responsible if needed + qd = new QueryDefinition(sql); // rebuild with final SQL + // re-add parameters (QueryDefinition is immutable-like with chaining, but to keep it simple rebuild) + // Add parameters again + if (!string.IsNullOrWhiteSpace(jobquery.JobType)) qd = qd.WithParameter("@jobType", jobquery.JobType); + if (jobquery.Companies != null) + { + for (int i = 0; i < jobquery.Companies.Count; i++) qd = qd.WithParameter($"@company{i}", jobquery.Companies[i]); + } + if (jobquery.Locations != null) + { + for (int i = 0; i < jobquery.Locations.Count; i++) qd = qd.WithParameter($"@location{i}", jobquery.Locations[i]); + } + if (!string.IsNullOrWhiteSpace(jobquery.JobLevel)) qd = qd.WithParameter("@jobLevel", jobquery.JobLevel); + if (jobquery.StartDate > DateTime.MinValue) qd = qd.WithParameter("@startDate", jobquery.StartDate); + if (jobquery.EndDate > DateTime.MinValue) qd = qd.WithParameter("@endDate", jobquery.EndDate); + + logger.LogInformation($"Constructed job query: {sql}"); + + return await QueryJobsAsync(qd); + } + } +} diff --git a/src/Common/Repositories/ProblemRepository.cs b/src/Common/Repositories/ProblemRepository.cs index 8ca5d29..1d5fa79 100644 --- a/src/Common/Repositories/ProblemRepository.cs +++ b/src/Common/Repositories/ProblemRepository.cs @@ -1,4 +1,5 @@ -using Common.Enums; +using Common.DatabaseModels; +using Common.Enums; using Common.Factories; using Common.Models; using Microsoft.Azure.Cosmos; diff --git a/src/PetProjectAzFunctions/.gitignore b/src/PetProjectAzFunctions/.gitignore new file mode 100644 index 0000000..ff5b00c --- /dev/null +++ b/src/PetProjectAzFunctions/.gitignore @@ -0,0 +1,264 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# Azure Functions localsettings file +local.settings.json + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +project.fragment.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc \ No newline at end of file diff --git a/src/PetProjectAzFunctions/Dockerfile b/src/PetProjectAzFunctions/Dockerfile new file mode 100644 index 0000000..61200dd --- /dev/null +++ b/src/PetProjectAzFunctions/Dockerfile @@ -0,0 +1,29 @@ +# See https://aka.ms/customizecontainer to learn how to customize your debug container and how Visual Studio uses this Dockerfile to build your images for faster debugging. + +# This stage is used when running from VS in fast mode (Default for Debug configuration) +FROM mcr.microsoft.com/azure-functions/dotnet-isolated:4-dotnet-isolated8.0 AS base +WORKDIR /home/site/wwwroot +EXPOSE 8080 + + +# This stage is used to build the service project +FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build +ARG BUILD_CONFIGURATION=Release +WORKDIR /src +COPY ["PetProjectAzFunctions/PetProjectAzFunctions.csproj", "PetProjectAzFunctions/"] +RUN dotnet restore "./PetProjectAzFunctions/PetProjectAzFunctions.csproj" +COPY . . +WORKDIR "/src/PetProjectAzFunctions" +RUN dotnet build "./PetProjectAzFunctions.csproj" -c $BUILD_CONFIGURATION -o /app/build + +# This stage is used to publish the service project to be copied to the final stage +FROM build AS publish +ARG BUILD_CONFIGURATION=Release +RUN dotnet publish "./PetProjectAzFunctions.csproj" -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false + +# This stage is used in production or when running from VS in regular mode (Default when not using the Debug configuration) +FROM base AS final +WORKDIR /home/site/wwwroot +COPY --from=publish /app/publish . +ENV AzureWebJobsScriptRoot=/home/site/wwwroot \ + AzureFunctionsJobHost__Logging__Console__IsEnabled=true \ No newline at end of file diff --git a/src/PetProjectAzFunctions/JobOpeningsSyncFunction.cs b/src/PetProjectAzFunctions/JobOpeningsSyncFunction.cs new file mode 100644 index 0000000..a87b8db --- /dev/null +++ b/src/PetProjectAzFunctions/JobOpeningsSyncFunction.cs @@ -0,0 +1,67 @@ +using System; +using Common.Managers; +using Common.Repositories; +using Microsoft.Azure.Functions.Worker; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace PetProjectAzFunctions +{ + public class JobOpeningsSyncFunction + { + private readonly ILogger _logger; + + private readonly JobScrapperSettingsRepository _jobScrapperSettingsRepository; + + private readonly IServiceProvider _serviceProvider; + + public JobOpeningsSyncFunction(ILoggerFactory loggerFactory, + JobScrapperSettingsRepository jobScrapperSettingsRepository, + IServiceProvider serviceProvider) + { + _logger = loggerFactory.CreateLogger(); + _jobScrapperSettingsRepository = jobScrapperSettingsRepository; + _serviceProvider = serviceProvider; + } + + [Function("JobOpeningsSyncFunction")] + public async Task Run([TimerTrigger("%CronPeriod%")] TimerInfo myTimer) + { + _logger.LogInformation($"C# Timer trigger function executed at: {DateTime.Now}"); + var scrapperSettings = await _jobScrapperSettingsRepository.GetAllSettings(); + var currentTime = DateTime.UtcNow; + await Parallel.ForEachAsync(scrapperSettings, async (setting, ct) => + { + try + { + if (setting.enabled) + { + if(setting.lastRunTime.AddMinutes(setting.runIntervalInMinutes) >= currentTime.AddMinutes(-1)) + { + using var scope = _serviceProvider.CreateScope(); + var scrapperInstance = scope.ServiceProvider.GetRequiredService(); + scrapperInstance.ConfigureSettings(setting); + await scrapperInstance.RunAsync(); + setting.lastRunTime = currentTime; + await _jobScrapperSettingsRepository.UpdateSettingsAsync(setting.id, setting); + } + else + { + _logger.LogInformation($"Scrapper setting {setting.id} was run at {setting.lastRunTime}, next run schedule has not yet come. Skipping this run."); + } + } + else + { + _logger.LogInformation($"Scrapper setting {setting.id} is disabled. Skipping."); + return; + } + } + catch (Exception ex) + { + _logger.LogError(ex, $"Error processing scrapper settings: {setting}"); + } + }); + } + } +} diff --git a/src/PetProjectAzFunctions/PetProjectAzFunctions.csproj b/src/PetProjectAzFunctions/PetProjectAzFunctions.csproj new file mode 100644 index 0000000..873e3b4 --- /dev/null +++ b/src/PetProjectAzFunctions/PetProjectAzFunctions.csproj @@ -0,0 +1,37 @@ + + + net8.0 + v4 + Exe + enable + enable + /home/site/wwwroot + Linux + + + + + + + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + Never + + + + + + \ No newline at end of file diff --git a/src/PetProjectAzFunctions/Program.cs b/src/PetProjectAzFunctions/Program.cs new file mode 100644 index 0000000..7de0b9e --- /dev/null +++ b/src/PetProjectAzFunctions/Program.cs @@ -0,0 +1,53 @@ +using Common.Constants; +using Common.Engines; +using Common.Factories; +using Common.Managers; +using Common.Repositories; +using Microsoft.Azure.Cosmos; +using Microsoft.Azure.Functions.Worker.Builder; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using System.Data; + +public class Program +{ + public static void Main(string[] args) + { + var builder = FunctionsApplication.CreateBuilder(args); + + builder.ConfigureFunctionsWebApplication(); + ConfigureServices(builder); + builder.Build().Run(); + } + + private static void ConfigureServices(FunctionsApplicationBuilder builder) + { + var services = builder.Services; + // Register your services here + services.AddLogging(); + services.AddHttpClient(); + services.AddTransient(); + services.AddTransient(); + services.AddTransient(); + services.AddTransient(); + services.AddTransient(); + + var config = builder.Configuration; + + #region Register Cosmos related services + services.AddSingleton(s => + { + var cosmosDbUri = config[ConfigurationConstants.CosmosDBUriKey]; + var cosmosDbAccountKey = config[ConfigurationConstants.CosmosDBAccountKey]; + if (string.IsNullOrEmpty(cosmosDbUri) || string.IsNullOrEmpty(cosmosDbAccountKey)) + { + throw new DataException("Cosmos DB configuration is missing or invalid."); + } + return new CosmosClient(cosmosDbUri, cosmosDbAccountKey); + }); + + services.AddTransient(); + #endregion + + } +} diff --git a/src/PetProjectAzFunctions/Properties/launchSettings.json b/src/PetProjectAzFunctions/Properties/launchSettings.json new file mode 100644 index 0000000..6a6168e --- /dev/null +++ b/src/PetProjectAzFunctions/Properties/launchSettings.json @@ -0,0 +1,15 @@ +{ + "profiles": { + "PetProjectAzFunctions": { + "commandName": "Project", + "commandLineArgs": "--port 7149" + }, + "Container (Dockerfile)": { + "commandName": "Docker", + "launchUrl": "{Scheme}://{ServiceHost}:{ServicePort}", + "containerRunArguments": "--init", + "httpPort": 31027, + "useSSL": false + } + } +} \ No newline at end of file diff --git a/src/PetProjectAzFunctions/Properties/serviceDependencies.json b/src/PetProjectAzFunctions/Properties/serviceDependencies.json new file mode 100644 index 0000000..df4dcc9 --- /dev/null +++ b/src/PetProjectAzFunctions/Properties/serviceDependencies.json @@ -0,0 +1,11 @@ +{ + "dependencies": { + "appInsights1": { + "type": "appInsights" + }, + "storage1": { + "type": "storage", + "connectionId": "AzureWebJobsStorage" + } + } +} \ No newline at end of file diff --git a/src/PetProjectAzFunctions/host.json b/src/PetProjectAzFunctions/host.json new file mode 100644 index 0000000..ee5cf5f --- /dev/null +++ b/src/PetProjectAzFunctions/host.json @@ -0,0 +1,12 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + }, + "enableLiveMetricsFilters": true + } + } +} \ No newline at end of file diff --git a/src/PetProjectAzFunctions/readme.md b/src/PetProjectAzFunctions/readme.md new file mode 100644 index 0000000..0b247b5 --- /dev/null +++ b/src/PetProjectAzFunctions/readme.md @@ -0,0 +1,11 @@ +# TimerTrigger - C# + +The `TimerTrigger` makes it incredibly easy to have your functions executed on a schedule. This sample demonstrates a simple use case of calling your function every 5 minutes. + +## How it works + +For a `TimerTrigger` to work, you provide a schedule in the form of a [cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression)(See the link for full details). A cron expression is a string with 6 separate expressions which represent a given schedule via patterns. The pattern we use to represent every 5 minutes is `0 */5 * * * *`. This, in plain text, means: "When seconds is equal to 0, minutes is divisible by 5, for any hour, day of the month, month, day of the week, or year". + +## Learn more + + Documentation \ No newline at end of file diff --git a/src/Synchronizer/ProblemsProcessor.cs b/src/Synchronizer/ProblemsProcessor.cs index db86026..a1a66d8 100644 --- a/src/Synchronizer/ProblemsProcessor.cs +++ b/src/Synchronizer/ProblemsProcessor.cs @@ -1,5 +1,6 @@ using Microsoft.Azure.Cosmos; using Common.Models; +using Common.DatabaseModels; namespace Synchronizer; diff --git a/src/lcw.sln b/src/lcw.sln index ec43a01..424b481 100644 --- a/src/lcw.sln +++ b/src/lcw.sln @@ -9,10 +9,9 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Backend", "Backend\Backend. EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Synchronizer", "Synchronizer\Synchronizer.csproj", "{BF0FF8B1-3D65-459E-8CA1-A7C0ED4F97B9}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PetProjectAzFunctions", "PetProjectAzFunctions\PetProjectAzFunctions.csproj", "{31C50D63-3018-4679-92FD-F080D47A32D0}" +EndProject Global - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Debug|x64 = Debug|x64 @@ -58,5 +57,20 @@ Global {BF0FF8B1-3D65-459E-8CA1-A7C0ED4F97B9}.Release|x64.Build.0 = Release|Any CPU {BF0FF8B1-3D65-459E-8CA1-A7C0ED4F97B9}.Release|x86.ActiveCfg = Release|Any CPU {BF0FF8B1-3D65-459E-8CA1-A7C0ED4F97B9}.Release|x86.Build.0 = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|x64.ActiveCfg = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|x64.Build.0 = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|x86.ActiveCfg = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Debug|x86.Build.0 = Debug|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|Any CPU.Build.0 = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|x64.ActiveCfg = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|x64.Build.0 = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|x86.ActiveCfg = Release|Any CPU + {31C50D63-3018-4679-92FD-F080D47A32D0}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE EndGlobalSection EndGlobal