Skip to content

Commit

Permalink
REFACTOR: Changed Data Model for storing data in Database (#4)
Browse files Browse the repository at this point in the history
* FEATURE: Added PassiveSearch v1

- Added Connection to ElasticSearch
- Added 2 API's - /ping and /passive
- Keyword Search using /passive
- Added Storing to DB capability when using /search

* Fix: Added error handling

-added error handling for passivesearch api endpoint

* FEATURE: Added Simple Query String

- Changed ElasticSearch Search type to Simple Query String

* REFACTOR: Improved Active Search

-Made Active Search make better use of downtimes (Rate Limits or Validation Downtime)

* FIX: Issues with Active Search

-Fixed errors with parrallel processing

* FEATURE: Added SeedScript

- Added RootQuery as an optional query param for activesearch which basically takes out the prompt builder and takes a raw query to look for in Github API.
- Added Python SeedScript

* REFACTOR: Changed Data Model for Storing in Database

- Added other fields like LastUpdated, ETAG, URL, LastModified to Schema.
- Added SHA hash as the unique parameter to not allow duplicates in the database

* REFACTOR: Minor Log Refactor

- Changed console.log to console.info / console.error

* FIX: Storing only the reqd part of URL

* FEATURE: Update API

-Added PUT /database endpoint for updating the database

- Changed name of the endpoints

- Commented Database Trace Logs

* FIX: Update API Fix + Global ElasticSearch and Octokit Clients

- Fixed GetFileContents
- Added Global variables for EsClient and Octokit for easy access and clean code
- Better logs

* FIX: Minor Comment

* REFACTOR: UpdateAPI refactor

- Update API Refactored

* REFACTOR: Minor Fixes

- Better API Naming
- Fixed Update API
  • Loading branch information
ishaan812 committed Nov 5, 2023
1 parent aa559b9 commit 8a2db0c
Show file tree
Hide file tree
Showing 9 changed files with 369 additions and 112 deletions.
4 changes: 3 additions & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"eslint:recommended",
"plugin:@typescript-eslint/recommended",
"plugin:jest/recommended",
"prettier"
"prettier",
"modular/best-practices",
"modular/style"
],
"rules": {
// The following rule is enabled only to supplement the inline suppression
Expand Down
8 changes: 5 additions & 3 deletions scripts/seed_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

def call_local_endpoint(prompt):
#TODO: Change this to the correct URL when activesearch endpoint is changed
url = f'http://localhost:8080/search?rootquery="{prompt}"'
url = f'http://localhost:8080/database?rootquery="{prompt}"'


try:
response = requests.get(url)
Expand All @@ -24,8 +25,9 @@ def call_local_endpoint(prompt):

if __name__ == "__main__":
#Get Open API files
call_local_endpoint("openapi: 3")
call_local_endpoint('openapi: 3')
#Get Swagger files
call_local_endpoint("swagger: 2")
# call_local_endpoint('"swagger: \"2"')


#PS: Takes a long time to run
100 changes: 77 additions & 23 deletions src/DB/dbutils.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,77 @@
export async function checkClusterHealth(esClient: any) : Promise<string> {
try {
const response = await esClient.cat.health();
console.log('Cluster health:', response);
return response;
} catch (error) {
console.error('Error checking cluster health:', error);
return "";
}
}

export async function BulkStoreToDB(validFiles: any,esClient: any) : Promise<void>{
try {
if(validFiles.length == 0){
return;
}
const response = await esClient.bulk({ body: validFiles });
// console.log('Bulk response:', response);
return response;
} catch (error) {
console.error('Error bulk indexing:', error);
}
}
import { esClient } from "../app.js";

export async function checkClusterHealth(): Promise<string> {
try {
const response = await esClient.cat.health();
console.info('Cluster health:', response);
return response;
} catch (error) {
console.error('Error checking cluster health:', error);
return '';
}
}

export async function BulkStoreToDB(
validFiles: any,
): Promise<void> {
try {
if (validFiles.length == 0) {
return;
}
const response = await esClient.bulk({ body: validFiles });
return response;
} catch (error) {
console.error('Error bulk indexing:', error);
}
}

export async function DeleteDocumentWithId(Id : string): Promise<void> {
try {
const index = 'openapi';
const updatedDocument = {
isDeleted: true,
};
await esClient.update({
index,
id: Id,
body: {
doc: updatedDocument,
},
});
console.info(`Document with ID ${Id} soft deleted.`);
} catch (error) {
console.error('Error deleting document from the database:', error);
}
}

export async function CreateDocument(Id:string, document: any): Promise<void> {
try {
const index = 'openapi';
await esClient.index({
index,
id: Id,
body: {
doc: document,
},
});
console.info(`New Document Added with ID ${Id}`);
} catch (error) {
//TODO: Add error handling for 400 over here
console.error('Error creating the document:', error);
}
}

export async function GetDocumentWithId(Id:string): Promise<any> {
try {
const index = 'openapi';
const document = await esClient.get({
index,
id: Id,
});
return document;
} catch (error) {
console.error('Error getting document from database:', error);
}
}


40 changes: 32 additions & 8 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ import { Octokit } from 'octokit';
import { activeSearch, passiveSearch } from './searchtools/search.js';
import dotenv from 'dotenv';
import es from 'elasticsearch';
import { checkClusterHealth } from './DB/dbutils.js';
import { throttling } from '@octokit/plugin-throttling'
import { retry } from '@octokit/plugin-retry'


import { checkClusterHealth} from './DB/dbutils.js';
import { throttling } from '@octokit/plugin-throttling';
import { retry } from '@octokit/plugin-retry';
import { UpdateOpenAPIFiles } from './updatetools/update.js';

const CustomOctokit = Octokit.plugin(throttling as any, retry as any);
dotenv.config();
Expand All @@ -20,7 +19,8 @@ const octokit = new CustomOctokit({
octokit.log.warn(
`Request quota exhausted for request ${options.method} ${options.url}`,
);
console.log(`Retrying after ${retryAfter} seconds!`);
console.info(`Retrying after ${retryAfter} seconds!`);

return true;
},
onSecondaryRateLimit: (retryAfter, options, octokit) => {
Expand All @@ -34,6 +34,21 @@ const octokit = new CustomOctokit({

const app = express();

export const esClient = new es.Client({
host: 'http://localhost:9200',
// log: 'trace',
});

//TODO: Iterate on api endpoints
app.get('/search', async (_req, _res) => {
const query = _req.query.q as string;
const results = await passiveSearch(query);
_res.send(results);
});

//openapi2db
app.post('/openapi', async (_req, _res) => {
=======


const esClient = new es.Client({
Expand Down Expand Up @@ -70,14 +85,23 @@ app.use('/search', async (_req, _res) => {
_res.send(results);
});


app.put('/openapi', async (_req, _res) => {
const results = await UpdateOpenAPIFiles();
_res.send(results);
});

app.use('/ping', async (_req, _res) => {
const response = await checkClusterHealth(esClient);
const response = await checkClusterHealth();
_res.send(response);
})
});


app.get('/', (_req, _res) => {
_res.send('TypeScript With Express');
});



export default app;
export { octokit };
2 changes: 1 addition & 1 deletion src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ dotenv.config();
const port = process.env.PORT;

app.listen(port, () => {
console.log(`⚡️[server]: Server is running at http://localhost:${port}`);
console.info(`⚡️[server]: Server is running at http://localhost:${port}`);
});
113 changes: 67 additions & 46 deletions src/searchtools/search.ts
Original file line number Diff line number Diff line change
@@ -1,59 +1,81 @@
import { queryBuilder, ValidateandStoreFiles} from './searchutils.js';
import { octokit } from '../app.js';
import { queryBuilder, ValidateandStoreFiles } from './searchutils.js';
import { octokit, esClient } from '../app.js';

let processCount = 0;
let finishedCount = 0;

let processCount = 1;
let finishedCount = 1;

export async function activeSearch(
prompt: string,
repo: string,
organisation: string,
username: string,
rootquery: string,
esClient: any,
): Promise<any> {
const query = await queryBuilder(prompt, repo, organisation, username, rootquery);
const query = await queryBuilder(
prompt,
repo,
organisation,
username,
rootquery,
);
let files = [];
let validFiles = [];
console.log("Query: "+query)
await octokit.paginate(octokit.rest.search.code, {
q: query,
per_page: 100
},
(response : any) => {
files = files.concat(response.data)
if(files.length >= 200){
processCount++;
console.log("ValidateandStoreFiles Process Number "+processCount+" Started")
ValidateandStoreFiles(files, esClient).then((validatedFiles) => {
validFiles = validFiles.concat(validatedFiles);
finishedCount++;
console.log("ValidateandStoreFiles Process Number "+finishedCount+" Started")
});
files = []
}
}
console.info('Query: ' + query);
await octokit.paginate(
octokit.rest.search.code,
{
q: query,
per_page: 100,
},
(response: any) => {
files = files.concat(response.data);
if (files.length >= 200) {
processCount++;
console.info(
'ValidateandStoreFiles Process Number ' + processCount + ' Started',
);
ValidateandStoreFiles(files).then((validatedFiles) => {
validFiles = validFiles.concat(validatedFiles);
finishedCount++;
console.info(
'ValidateandStoreFiles Process Number ' +
finishedCount +
' Finished',
);
});
files = [];
}
},
);
//this ending before the above one
processCount++;
console.log("ValidateandStoreFiles Process Number "+processCount+" Started")
ValidateandStoreFiles(files, esClient).then((validatedFiles) => {
console.info(
'ValidateandStoreFiles Process Number ' + processCount + ' Started',
);
ValidateandStoreFiles(files).then((validatedFiles) => {
validFiles = validFiles.concat(validatedFiles);
console.log("ValidateandStoreFiles Process Number "+finishedCount+" Started")
console.info(
'ValidateandStoreFiles Process Number ' + finishedCount + ' Finished',
);
finishedCount++;
});
while(processCount > finishedCount){
await new Promise(r => setTimeout(r, 3000));
console.log("Total Processes: "+processCount+"\nFinished Processes: "+finishedCount)
console.log("Waiting for all files to be processed")
while (processCount > finishedCount) {
await new Promise((r) => setTimeout(r, 5000));
console.info(
'Total Processes: ' +
processCount +
'\nFinished Processes: ' +
finishedCount,
);
console.info('Waiting for all files to be processed');

}
return validFiles;
}

export async function passiveSearch(
query: string,
esClient: any,
): Promise<any> {
try {
if (esClient === undefined) {
Expand All @@ -65,33 +87,32 @@ export async function passiveSearch(
query: {
simple_query_string: {
query: query,
fields: ["servers^2","paths^1.5","data^1"],
default_operator: "and"
}
}
}
});

fields: ['title^3', 'servers^2', 'paths^1.5', 'data^1'],
default_operator: 'and',
},
},
},
});
if (result.hits.hits) {
if (result.hits.hits.length === 0) {
console.log('No results found in the database');
console.error('No results found in the database');
// activeSearch(query, "", "", "", esClient);
}
return result.hits.hits;
}
} catch (error) {
if (error.message.includes('No Living connections')) {
console.log('Elasticsearch connection error:', error);
return error

console.error('Elasticsearch connection error:', error);
return error;
} else {
console.log('Error occurred during passive search:', error);
return error;
console.error('Error occurred during passive search:', error);
return error;

}
}

return 'Database not found';
}




Loading

0 comments on commit 8a2db0c

Please sign in to comment.