From aa559b90f95a6d4a7fd9ab1add155f093bb0bd1b Mon Sep 17 00:00:00 2001 From: Ishaan Shah <70190533+ishaan812@users.noreply.github.com> Date: Sun, 5 Nov 2023 14:14:37 +0530 Subject: [PATCH] FEATURE: Added seed script (#3) * FEATURE: Added PassiveSearch v1 - Added Connection to ElasticSearch - Added 2 API's - /ping and /passive - Keyword Search using /passive - Added Storing to DB capability when using /search * Fix: Added error handling -added error handling for passivesearch api endpoint * FEATURE: Added Simple Query String - Changed ElasticSearch Search type to Simple Query String * REFACTOR: Improved Active Search -Made Active Search make better use of downtimes (Rate Limits or Validation Downtime) * FIX: Issues with Active Search -Fixed errors with parrallel processing * FEATURE: Added SeedScript - Added RootQuery as an optional query param for activesearch which basically takes out the prompt builder and takes a raw query to look for in Github API. - Added Python SeedScript --- scripts/seed_script.py | 31 +++++++++++++++++++++++++++++++ src/app.ts | 4 +++- src/searchtools/search.ts | 13 ++++++++++--- src/searchtools/searchutils.ts | 9 +++++++-- 4 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 scripts/seed_script.py diff --git a/scripts/seed_script.py b/scripts/seed_script.py new file mode 100644 index 0000000..21f45d2 --- /dev/null +++ b/scripts/seed_script.py @@ -0,0 +1,31 @@ +# Description: This script is used to seed the database with data from the data folder +# !pip install requests +# python scripts/seed_script.py to run from main folder + +import requests + +def call_local_endpoint(prompt): + #TODO: Change this to the correct URL when activesearch endpoint is changed + url = f'http://localhost:8080/search?rootquery="{prompt}"' + + try: + response = requests.get(url) + + # Check if the response was successful (status code 200) + if response.status_code == 200: + print("Request to localhost:8080/search was successful!") + print("Response content:") + print(response.text) + else: + print(f"Request to localhost:8080/active failed with status code: {response.status_code}") + + except requests.exceptions.RequestException as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + #Get Open API files + call_local_endpoint("openapi: 3") + #Get Swagger files + call_local_endpoint("swagger: 2") + +#PS: Takes a long time to run \ No newline at end of file diff --git a/src/app.ts b/src/app.ts index b35073d..623778e 100644 --- a/src/app.ts +++ b/src/app.ts @@ -4,7 +4,6 @@ import { activeSearch, passiveSearch } from './searchtools/search.js'; import dotenv from 'dotenv'; import es from 'elasticsearch'; import { checkClusterHealth } from './DB/dbutils.js'; -// import { router as userRoutes } from "./routes/user.routes.js"; import { throttling } from '@octokit/plugin-throttling' import { retry } from '@octokit/plugin-retry' @@ -47,6 +46,7 @@ const esClient = new es.Client({ // Check for openapi.json in the contents of the repository // If it exists, then store in database with important content + app.use('/passive', async (_req, _res) => { const query = _req.query.q as string; const results = await passiveSearch(query, esClient); @@ -58,11 +58,13 @@ app.use('/search', async (_req, _res) => { const Organisation = _req.query.org as string; const User = _req.query.user as string; const Prompt = _req.query.prompt as string; + const RootQuery = _req.query.rootquery as string; const results = await activeSearch( Prompt as string, Repository as string, Organisation as string, User as string, + RootQuery as string, esClient as any, ); _res.send(results); diff --git a/src/searchtools/search.ts b/src/searchtools/search.ts index c05eb37..cdf1e2d 100644 --- a/src/searchtools/search.ts +++ b/src/searchtools/search.ts @@ -10,31 +10,38 @@ export async function activeSearch( repo: string, organisation: string, username: string, + rootquery: string, esClient: any, ): Promise { - const query = await queryBuilder(prompt, repo, organisation, username); + const query = await queryBuilder(prompt, repo, organisation, username, rootquery); let files = []; let validFiles = []; + console.log("Query: "+query) await octokit.paginate(octokit.rest.search.code, { q: query, per_page: 100 }, (response : any) => { files = files.concat(response.data) - if(files.length >= 500){ - console.log("Validating and storing files since rate limit reached") + if(files.length >= 200){ processCount++; + console.log("ValidateandStoreFiles Process Number "+processCount+" Started") ValidateandStoreFiles(files, esClient).then((validatedFiles) => { validFiles = validFiles.concat(validatedFiles); finishedCount++; + console.log("ValidateandStoreFiles Process Number "+finishedCount+" Started") }); files = [] } } ); //this ending before the above one + processCount++; + console.log("ValidateandStoreFiles Process Number "+processCount+" Started") ValidateandStoreFiles(files, esClient).then((validatedFiles) => { validFiles = validFiles.concat(validatedFiles); + console.log("ValidateandStoreFiles Process Number "+finishedCount+" Started") + finishedCount++; }); while(processCount > finishedCount){ await new Promise(r => setTimeout(r, 3000)); diff --git a/src/searchtools/searchutils.ts b/src/searchtools/searchutils.ts index c5c419c..bbe42ad 100644 --- a/src/searchtools/searchutils.ts +++ b/src/searchtools/searchutils.ts @@ -37,11 +37,16 @@ export async function getFileContents( return response.data['content']; } -export async function queryBuilder(prompt: string, repo: string, organisation: string, username: string): Promise { +export async function queryBuilder(prompt: string, repo: string, organisation: string, username: string, rootquery: string): Promise { if(prompt == undefined){ prompt = "" } - let query = prompt + ' AND "openapi: 3"'; + let query + if(rootquery != undefined){ + query = rootquery + return query + } + query = prompt + ' AND "openapi: 3"'; // query+= prompt + ' AND "swagger: \\"2"' if (repo != undefined) { query += '+repo:' + repo;