Skip to content

Commit

Permalink
Add docker-compose override file for Traffic Monitoring (#2224)
Browse files Browse the repository at this point in the history
* add docker-compose override file for traffic monitoring

* Update Documentation & Code Style

* remove faulty imports

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
tstadel and github-actions[bot] committed Feb 21, 2022
1 parent 9096dda commit b2e6dcc
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
34 changes: 34 additions & 0 deletions docker-compose.mitm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# docker-compose override file to enable HTTP traffic monitoring between ui, haystack-api and elasticsearch using mitmproxy.
# After startup you can find mitmweb under localhost:8081 in your browser.
# Usage: docker-compose -f docker-compose[-gpu].yml -f docker-compose.mitm.yml up
version: "3"
services:
haystack-api:
environment:
- HTTP_PROXY=http://mitmproxy:8080
- HTTPS_PROXY=https://mitmproxy:8080
- REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
- DOCUMENTSTORE_PARAMS_USE_SYSTEM_PROXY=true
command: "/bin/bash -c 'sleep 10
&& wget -e http_proxy=mitmproxy:8080 -O /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem
&& update-ca-certificates
&& gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 2 --timeout 180'"
depends_on:
- mitmproxy
ui:
environment:
- HTTP_PROXY=http://mitmproxy:8080
- HTTPS_PROXY=https://mitmproxy:8080
- REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
command: "/bin/bash -c 'sleep 15
&& wget -e http_proxy=mitmproxy:8080 -O /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem
&& update-ca-certificates
&& python -m streamlit run ui/webapp.py'"
depends_on:
- mitmproxy
mitmproxy:
image: "mitmproxy/mitmproxy:latest"
ports:
- 8080:8080
- 8081:8081
command: "mitmweb --web-host 0.0.0.0 --set block_global=false"
23 changes: 19 additions & 4 deletions haystack/document_stores/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from modulefinder import Module
from typing import List, Optional, Union, Dict, Any, Generator
from typing import List, Optional, Type, Union, Dict, Any, Generator

import json
import logging
import time
from copy import deepcopy
from string import Template
from collections import defaultdict

import numpy as np
from scipy.special import expit
from tqdm.auto import tqdm

try:
from elasticsearch import Elasticsearch, RequestsHttpConnection
from elasticsearch import Elasticsearch, RequestsHttpConnection, Connection, Urllib3HttpConnection
from elasticsearch.helpers import bulk, scan
from elasticsearch.exceptions import RequestError
except (ImportError, ModuleNotFoundError) as ie:
Expand Down Expand Up @@ -65,6 +64,7 @@ def __init__(
skip_missing_embeddings: bool = True,
synonyms: Optional[List] = None,
synonym_type: str = "synonym",
use_system_proxy: bool = False,
):
"""
A DocumentStore using Elasticsearch to store and query the documents for our search.
Expand Down Expand Up @@ -137,6 +137,7 @@ def __init__(
:param synonym_type: Synonym filter type can be passed.
Synonym or Synonym_graph to handle synonyms, including multi-word synonyms correctly during the analysis process.
More info at https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-graph-tokenfilter.html
:param use_system_proxy: Whether to use system proxy.
"""
# save init parameters to enable export of component config as YAML
Expand Down Expand Up @@ -172,6 +173,7 @@ def __init__(
skip_missing_embeddings=skip_missing_embeddings,
synonyms=synonyms,
synonym_type=synonym_type,
use_system_proxy=use_system_proxy,
)

self.client = self._init_elastic_client(
Expand All @@ -186,6 +188,7 @@ def __init__(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
use_system_proxy=use_system_proxy,
)

# configure mappings to ES fields that will be used for querying / displaying results
Expand Down Expand Up @@ -251,13 +254,18 @@ def _init_elastic_client(
ca_certs: Optional[str],
verify_certs: bool,
timeout: int,
use_system_proxy: bool,
) -> Elasticsearch:

hosts = self._prepare_hosts(host, port)

if (api_key or api_key_id) and not (api_key and api_key_id):
raise ValueError("You must provide either both or none of `api_key_id` and `api_key`")

connection_class: Type[Connection] = Urllib3HttpConnection
if use_system_proxy:
connection_class = RequestsHttpConnection

if api_key:
# api key authentication
client = Elasticsearch(
Expand All @@ -267,6 +275,7 @@ def _init_elastic_client(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)
elif aws4auth:
# aws elasticsearch with IAM
Expand All @@ -288,11 +297,17 @@ def _init_elastic_client(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)
else:
# there is no authentication for this elasticsearch instance
client = Elasticsearch(
hosts=hosts, scheme=scheme, ca_certs=ca_certs, verify_certs=verify_certs, timeout=timeout
hosts=hosts,
scheme=scheme,
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)

# Test connection
Expand Down

0 comments on commit b2e6dcc

Please sign in to comment.