diff --git a/.eslintrc.json b/.eslintrc.json index 5607549..d3ef8a2 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -15,7 +15,9 @@ "eslint:recommended", "plugin:@typescript-eslint/recommended", "plugin:jest/recommended", - "prettier" + "prettier", + "modular/best-practices", + "modular/style" ], "rules": { // The following rule is enabled only to supplement the inline suppression diff --git a/README.md b/README.md index 366cdae..1e13cf1 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,63 @@ + + # GitHub OpenAPI Search -The goal of this project is to provide a robust yet easy way to search Github for Swagger and OpenAPI definitions. Understanding that there is a lot of noise available, that we only care about OpenAPIs that validate, and that the Github API has rate limits that require you to automate the crawling over time. Providing a robust open-source solution that will crawl public Github repositories for machine-readable API definitions. -The project will consist of developing an open-source API that allows you to pass in search parameters and then utilize the GitHub API to perform the search, helping simplify the search interface, make rate limits visible as part of the response, and handle conducting a search in an asynchronous way, allowing the user to make a call to initiate, but then separate calls to receive results over time as results come in, helping show outcomes over time. +The goal of this project is to provide a robust yet easy way to search Github for OpenAPI and Swagger definitions. Understanding that there is a lot of noise available, that we only care about OpenAPIs that validate, and that the Github API has rate limits that require you to automate the crawling over time. Providing a robust open-source solution that will crawl public Github repositories for machine-readable API definitions. +The project will consist of developing an open-source API that allows you to pass in search parameters and then utilize the GitHub API to perform the search, helping simplify the search interface, and handle conducting a search in an asynchronous way, allowing the user to make a call to initiate, but then separate calls to receive results over time as results come in, helping show outcomes over time. ## Tech Stack - - Node JS/Express JS - Typescript - Octokit.JS -- Jest +- Jest (For testing) +- Docker +- Python (Scripting) +- ElasticSearch + +## Dev Runbook +Dependancies: NodeJS 19, npm, Github APIKey +How to get a Github API Key: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens + +## Setting up OpenAPI Search with Docker Compose + +1. Clone the repository to your local setup +2. Make sure you have Docker installed locally. +3. Run `docker compose up` +4. Two Containers - Elasticsearch (The database container) and an instance of the server should have started. +5. Now to load the database with OpenAPI Files, run +`python scripts/seed_script.py` from the root of the folder. (Takes around 2-3hrs) +(More configuration of organisation list you can edit the scripts/assets/organisations1.txt, scripts/assets/organisations2.txt is for the next 1000 organisations) + +## Setting up the server manually + + 1. Clone the repository to your local setup + 2. Run `npm i` + 3. Make a `.env` file in the directory and add the variables: + **PORT**= **(port number you want to host the api)** + **GITHUB_API_KEY**= **(github API key)** + **ES_HOST**= **(determines location of elasticsearch db)** +4. Run `npm run build:watch` on one terminal. +5. On another terminal, run `npm run start` to start the server on the port specified on. +6. Now the nodejs server should be running! To test it just go to `localhost:{{PORT}}` and then you will be able to see the admin panel through which you can inference with some of the API's +7. Now to load the database with OpenAPI Files, run +`python scripts/seed_script.py` from the root of the folder. (Takes around 2-3hrs) + +## Setting up ElasticSearch locally (Manually) + 1. docker pull docker.elastic.co/elasticsearch/elasticsearch:8.8.2 + 2. docker network create elastic + 3. docker run \ + -p 9200:9200 \ + -p 9300:9300 \ + -e "discovery.type=single-node" \ + -e "xpack.security.enabled=false" \ + docker.elastic.co/elasticsearch/elasticsearch:8.8.2 + +## Loading Details +Currently, we are only indexing OpenAPI Files from the top 1000 most popular organisations from Github (Based on stars). Although more organisations can be indexed by adding them to the `scripts/assets/organisations.txt` file. + + +## API Endpoints +[![Run in Postman](https://run.pstmn.io/button.svg)](https://app.getpostman.com/run-collection/19841716-f1801bb7-b189-429b-a875-91b115d349a2?action=collection%2Ffork&source=rip_markdown&collection-url=entityId%3D19841716-f1801bb7-b189-429b-a875-91b115d349a2%26entityType%3Dcollection%26workspaceId%3D5ebe19fb-61d4-47a7-9cae-de3834853f6b) + 🚧Under Construction diff --git a/package-lock.json b/package-lock.json index 02db912..357bb56 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "@octokit/plugin-retry": "^6.0.0", "@octokit/plugin-throttling": "^7.0.0", "axios": "^1.4.0", + "dotenv": "^16.1.4", "elasticsearch": "^16.7.3", "express": "^4.18.2", @@ -28,6 +29,7 @@ "@types/jest": "~29.5", "@types/node": "^18.16.19", "@types/openapi-to-postmanv2": "^3.2.1", + "@types/supertest": "^2.0.12", "@typescript-eslint/eslint-plugin": "~5.59", "@typescript-eslint/parser": "~5.59", @@ -2421,6 +2423,7 @@ "version": "18.16.19", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.19.tgz", "integrity": "sha512-IXl7o+R9iti9eBW4Wg2hx1xQDig183jj7YLn8F7udNceyfkbn1ZxmzZXuak20gR40D7pIkIY1kYGx5VIGbaHKA==" + }, "node_modules/@types/openapi-to-postmanv2": { "version": "3.2.1", @@ -9042,6 +9045,74 @@ "@octokit/core": "^4.0.0" } }, + "node_modules/octokit/node_modules/@octokit/auth-token": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-3.0.4.tgz", + "integrity": "sha512-TWFX7cZF2LXoCvdmJWY7XVPi74aSY0+FfBZNSXEXFkMpjcqsQwDSYVv5FhRFaI0V1ECnwbz4j59T/G+rXNWaIQ==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/octokit/node_modules/@octokit/core": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-4.2.4.tgz", + "integrity": "sha512-rYKilwgzQ7/imScn3M9/pFfUf4I1AZEH3KhyJmtPdE2zfaXAn2mFfUy4FbKewzc2We5y/LlKLj36fWJLKC2SIQ==", + "dependencies": { + "@octokit/auth-token": "^3.0.0", + "@octokit/graphql": "^5.0.0", + "@octokit/request": "^6.0.0", + "@octokit/request-error": "^3.0.0", + "@octokit/types": "^9.0.0", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/octokit/node_modules/@octokit/graphql": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-5.0.6.tgz", + "integrity": "sha512-Fxyxdy/JH0MnIB5h+UQ3yCoh1FG4kWXfFKkpWqjZHw/p+Kc8Y44Hu/kCgNBT6nU1shNumEchmW/sUO1JuQnPcw==", + "dependencies": { + "@octokit/request": "^6.0.0", + "@octokit/types": "^9.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/octokit/node_modules/@octokit/plugin-retry": { + "version": "4.1.6", + "resolved": "https://registry.npmjs.org/@octokit/plugin-retry/-/plugin-retry-4.1.6.tgz", + "integrity": "sha512-obkYzIgEC75r8+9Pnfiiqy3y/x1bc3QLE5B7qvv9wi9Kj0R5tGQFC6QMBg1154WQ9lAVypuQDGyp3hNpp15gQQ==", + "dependencies": { + "@octokit/types": "^9.0.0", + "bottleneck": "^2.15.3" + }, + "engines": { + "node": ">= 14" + }, + "peerDependencies": { + "@octokit/core": ">=3" + } + }, + "node_modules/octokit/node_modules/@octokit/plugin-throttling": { + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/@octokit/plugin-throttling/-/plugin-throttling-5.2.3.tgz", + "integrity": "sha512-C9CFg9mrf6cugneKiaI841iG8DOv6P5XXkjmiNNut+swePxQ7RWEdAZRp5rJoE1hjsIqiYcKa/ZkOQ+ujPI39Q==", + "dependencies": { + "@octokit/types": "^9.0.0", + "bottleneck": "^2.15.3" + }, + "engines": { + "node": ">= 14" + }, + "peerDependencies": { + "@octokit/core": "^4.0.0" + } + }, "node_modules/on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", diff --git a/package.json b/package.json index 372c8a6..ff9fd75 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "@types/jest": "~29.5", "@types/node": "^18.16.19", "@types/openapi-to-postmanv2": "^3.2.1", + "@types/supertest": "^2.0.12", "@typescript-eslint/eslint-plugin": "~5.59", "@typescript-eslint/parser": "~5.59", diff --git a/scripts/seed_script.py b/scripts/seed_script.py index 3de3013..0090b8d 100644 --- a/scripts/seed_script.py +++ b/scripts/seed_script.py @@ -4,13 +4,13 @@ import requests + def call_local_endpoint(query): url = f'http://localhost:8080/openapi?{query}' print(f"Calling {url}") try: response = requests.post(url) - # Check if the response was successful (status code 200) if response.status_code == 200: print("Request to localhost:8080/search was successful!") @@ -36,5 +36,4 @@ def loadbyorganisations(filename): #Get Open API files loadbyorganisations("scripts/assets/organisations1.txt") -#Have to load Swagger Files too -#Load by Repository + diff --git a/src/DB/dbutils.ts b/src/DB/dbutils.ts index 5f94628..1b1a7d5 100644 --- a/src/DB/dbutils.ts +++ b/src/DB/dbutils.ts @@ -1,5 +1,6 @@ import { esClient } from '../app.js'; + export async function checkClusterHealth(): Promise { try { const response = await esClient.cat.health(); @@ -23,6 +24,7 @@ export async function BulkStoreToDB(validFiles: any): Promise { } } + export async function DeleteDocumentWithId(Id: string): Promise { try { const index = 'openapi'; @@ -71,3 +73,4 @@ export async function GetDocumentWithId(id: string): Promise { console.error('Error getting document from database:', error); } } + diff --git a/src/app.ts b/src/app.ts index 6f0fcc4..66644f2 100644 --- a/src/app.ts +++ b/src/app.ts @@ -19,11 +19,13 @@ dotenv.config(); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const rootDir = path.join(__dirname); + const octokit = new CustomOctokit({ userAgent: 'github-openapi-search/v0.0.1', auth: process.env.GITHUB_API_KEY, throttle: { onRateLimit: (retryAfter, options): boolean => { + octokit.log.warn( `Request quota exhausted for request ${options.method} ${options.url}`, ); @@ -38,6 +40,7 @@ const octokit = new CustomOctokit({ }, }); + const esHost = process.env.ES_HOST || 'localhost'; const esClient = new es.Client({ host: 'http://' + esHost + ':9200', @@ -47,7 +50,6 @@ const esClient = new es.Client({ const app = express(); app.set('view engine', 'pug'); app.set('views', path.join(rootDir, 'templates')); - app.get('/search', async (_req, _res) => { const query = _req.query.q as string; const results = await passiveSearch(query); @@ -80,6 +82,7 @@ app.use('/ping', async (_req, _res) => { _res.send(response); }); + app.get('/openapi/:id', async (_req, _res) => { const id = _req.params.id; GetDocumentWithId(id).then((response) => { @@ -114,5 +117,5 @@ app.get('/file/:id', (_req, _res) => { }); }) - export { octokit, esClient, app }; + diff --git a/src/searchtools/search.ts b/src/searchtools/search.ts index bb3ed6e..41b5414 100644 --- a/src/searchtools/search.ts +++ b/src/searchtools/search.ts @@ -28,7 +28,6 @@ export async function activeSearch( per_page: 100, }, (response: any) => { - console.log(response); files = files.concat(response.data); if (files.length >= 200) { processCount++; @@ -73,6 +72,7 @@ export async function activeSearch( return validFiles; } + export async function passiveSearch(query: string): Promise { try { if (esClient === undefined) { @@ -105,6 +105,5 @@ export async function passiveSearch(query: string): Promise { return error; } } - return 'Database not found'; } diff --git a/src/updatetools/updateutils.ts b/src/updatetools/updateutils.ts index dd4afe4..bf76063 100644 --- a/src/updatetools/updateutils.ts +++ b/src/updatetools/updateutils.ts @@ -1,3 +1,4 @@ + import OASNormalize from 'oas-normalize'; import { octokit, esClient } from '../app.js'; import { DeleteDocumentWithId, CreateDocument } from '../DB/dbutils.js'; @@ -110,3 +111,4 @@ export async function UpdateDocument(document: any): Promise { } }); } +