Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions content/rest/rate-limit.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ The REST API for searching items has a custom rate limit that is separate from t

{% ifversion fpt or ghec or ghes > 3.5 %}* The `dependency_snapshots` object provides your rate limit status for submitting snapshots to the dependency graph. For more information, see "[AUTOTITLE](/rest/dependency-graph)."{% endif %}

* The `code_scanning_upload` object provides your rate limit status for uploading SARIF results to code scanning. For more information, see "[AUTOTITLE](/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github)."

For more information on the headers and values in the rate limit response, see "[AUTOTITLE](/rest/overview/resources-in-the-rest-api#rate-limit-http-headers)."


Expand Down
3 changes: 3 additions & 0 deletions middleware/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@ import trailingSlashes from './trailing-slashes.js'
import fastlyBehavior from './fastly-behavior.js'
import mockVaPortal from './mock-va-portal.js'
import dynamicAssets from './dynamic-assets.js'
import rateLimit from './rate-limit.js'

const { DEPLOYMENT_ENV, NODE_ENV } = process.env
const isTest = NODE_ENV === 'test' || process.env.GITHUB_ACTIONS === 'true'

// By default, logging each request (with morgan), is on. And by default
// it's off if you're in a production environment or running automated tests.
// But if you set the env var, that takes precedence.
Expand Down Expand Up @@ -195,6 +197,7 @@ export default function (app) {
}

// *** Early exits ***
app.use(rateLimit)
app.use(instrument(handleInvalidPaths, './handle-invalid-paths'))
app.use(instrument(handleNextDataPath, './handle-next-data-path'))

Expand Down
142 changes: 142 additions & 0 deletions middleware/rate-limit.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import rateLimit from 'express-rate-limit'

import statsd from '../lib/statsd.js'
import { noCacheControl } from './cache-control.js'

const EXPIRES_IN_AS_SECONDS = 60

const MAX = process.env.RATE_LIMIT_MAX ? parseInt(process.env.RATE_LIMIT_MAX, 10) : 100
if (isNaN(MAX)) {
throw new Error(`process.env.RATE_LIMIT_MAX (${process.env.RATE_LIMIT_MAX}) not a number`)
}

const ipv4WithPort = /^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):\d{1,5}$/

export default rateLimit({
// 1 minute
windowMs: EXPIRES_IN_AS_SECONDS * 1000,
// limit each IP to X requests per windowMs
// We currently have about 25 instances in production. That's routed
// in Azure to spread the requests to each healthy instance.
// So, the true rate limit, per `windowMs`, is this number multiplied
// by the current number of instances.
max: MAX,

// Return rate limit info in the `RateLimit-*` headers
standardHeaders: true,
// Disable the `X-RateLimit-*` headers
legacyHeaders: false,

keyGenerator: (req) => {
let { ip } = req
// In our Azure preview environment, with the way the proxying works,
// the `x-forwarded-for` is always the origin IP with a port number
// attached. E.g. `75.40.90.27:56675, 169.254.129.1`
// This port number portion changes with every request, so we strip it.
ip = ip.replace(ipv4WithPort, '$1')

return ip
},

skip: (req) => {
// Always ignore these
if (req.path === '/api/events') return true
// If the query string looks totally regular, then skip
if (!isSuspiciousRequest(req)) return true

// This is so we can get a sense of how many requests are being
// treated as suspicious. They don't necessarily get rate limited.
const tags = [
`url:${req.url}`,
`ip:${req.ip}`,
`path:${req.path}`,
`qs:${req.url.split('?')[1]}`,
]

statsd.increment('middleware.rate_limit_dont_skip', 1, tags)

return false
},

handler: (req, res, next, options) => {
const tags = [`url:${req.url}`, `ip:${req.ip}`, `path:${req.path}`]
statsd.increment('middleware.rate_limit', 1, tags)
noCacheControl(res)
res.status(options.statusCode).send(options.message)
},
})

const RECOGNIZED_KEYS_BY_PREFIX = {
'/_next/data/': ['versionId', 'productId', 'restPage', 'apiVersion', 'category', 'subcategory'],
'/api/search': ['query', 'language', 'version', 'page', 'product', 'autocomplete', 'limit'],
'/api/anchor-redirect': ['hash', 'path'],
'/api/webhooks': ['category', 'version'],
'/api/pageinfo': ['pathname'],
}

const RECOGNIZED_KEYS = {
search: ['query', 'page'],
}

const MISC_KEYS = [
// Learning track pages
'learn',
'learnProduct',

// Platform picker
'platform',

// Tool picker
'tool',

// When apiVersion isn't the only one. E.g. ?apiVersion=XXX&tool=vscode
'apiVersion',

// Lowercase for rest pages
'apiversion',
]

/**
* Return true if the request looks like a DoS request. I.e. suspcious.
*
* We've seen lots of requests slip past the CDN and its edge rate limiter
* that clearly are not realistic URLs that you'd get in a browser.
* For example `?action=octrh&api=h9vcd&immagine=jzs3c&lang=xb0kp&m=rrmek`
* There are certain URLs that have query strings that are valid, but
* have one more query string keys. In particular the `/api/..` endpoints.
*
* Remember, just because this function might return true, it doesn't mean
* the request will be rate limited. It has to be both suspicous AND
* have lots and lots of requests.
*
* @param {Request} req
* @returns boolean
*/
function isSuspiciousRequest(req) {
const keys = Object.keys(req.query)

// Since this function can only speculate by query strings (at the
// moment), if the URL doesn't have any query strings it's not suspicious.
if (!keys.length) {
return false
}

// E.g. `/en/rest/actions?apiVersion=YYYY-MM-DD`
if (keys.length === 1 && keys[0] === 'apiVersion') return false

// Now check what query string keys are *left* based on a list of
// recognized keys per different prefixes.
for (const [prefix, recognizedKeys] of Object.entries(RECOGNIZED_KEYS_BY_PREFIX)) {
if (req.path.startsWith(prefix)) {
return keys.filter((key) => !recognizedKeys.includes(key)).length > 0
}
}

// E.g. `/fr/search?query=foo
if (req.path.split('/')[2] === 'search') {
return keys.filter((key) => !RECOGNIZED_KEYS.search.includes(key)).length > 0
}

const unrecognizedKeys = keys.filter((key) => !MISC_KEYS.includes(key))
return unrecognizedKeys.length > 0
}
18 changes: 18 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"dayjs": "^1.11.3",
"dotenv": "^16.0.1",
"express": "^4.18.1",
"express-rate-limit": "6.7.0",
"express-timeout-handler": "^2.2.2",
"fastest-levenshtein": "1.0.16",
"file-type": "18.2.1",
Expand Down