Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add docker-compose override file for Traffic Monitoring #2224

Merged
merged 4 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 34 additions & 0 deletions docker-compose.mitm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# docker-compose override file to enable HTTP traffic monitoring between ui, haystack-api and elasticsearch using mitmproxy.
# After startup you can find mitmweb under localhost:8081 in your browser.
# Usage: docker-compose -f docker-compose[-gpu].yml -f docker-compose.mitm.yml up
version: "3"
services:
haystack-api:
environment:
- HTTP_PROXY=http://mitmproxy:8080
- HTTPS_PROXY=https://mitmproxy:8080
- REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
- DOCUMENTSTORE_PARAMS_USE_SYSTEM_PROXY=true
command: "/bin/bash -c 'sleep 10
&& wget -e http_proxy=mitmproxy:8080 -O /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem
&& update-ca-certificates
&& gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 2 --timeout 180'"
depends_on:
- mitmproxy
ui:
environment:
- HTTP_PROXY=http://mitmproxy:8080
- HTTPS_PROXY=https://mitmproxy:8080
- REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
command: "/bin/bash -c 'sleep 15
&& wget -e http_proxy=mitmproxy:8080 -O /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem
&& update-ca-certificates
&& python -m streamlit run ui/webapp.py'"
depends_on:
- mitmproxy
mitmproxy:
image: "mitmproxy/mitmproxy:latest"
ports:
- 8080:8080
- 8081:8081
command: "mitmweb --web-host 0.0.0.0 --set block_global=false"
23 changes: 19 additions & 4 deletions haystack/document_stores/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from modulefinder import Module
from typing import List, Optional, Union, Dict, Any, Generator
from typing import List, Optional, Type, Union, Dict, Any, Generator

import json
import logging
import time
from copy import deepcopy
from string import Template
from collections import defaultdict

import numpy as np
from scipy.special import expit
from tqdm.auto import tqdm

try:
from elasticsearch import Elasticsearch, RequestsHttpConnection
from elasticsearch import Elasticsearch, RequestsHttpConnection, Connection, Urllib3HttpConnection
from elasticsearch.helpers import bulk, scan
from elasticsearch.exceptions import RequestError
except (ImportError, ModuleNotFoundError) as ie:
Expand Down Expand Up @@ -65,6 +64,7 @@ def __init__(
skip_missing_embeddings: bool = True,
synonyms: Optional[List] = None,
synonym_type: str = "synonym",
use_system_proxy: bool = False,
):
"""
A DocumentStore using Elasticsearch to store and query the documents for our search.
Expand Down Expand Up @@ -137,6 +137,7 @@ def __init__(
:param synonym_type: Synonym filter type can be passed.
Synonym or Synonym_graph to handle synonyms, including multi-word synonyms correctly during the analysis process.
More info at https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-graph-tokenfilter.html
:param use_system_proxy: Whether to use system proxy.

"""
# save init parameters to enable export of component config as YAML
Expand Down Expand Up @@ -172,6 +173,7 @@ def __init__(
skip_missing_embeddings=skip_missing_embeddings,
synonyms=synonyms,
synonym_type=synonym_type,
use_system_proxy=use_system_proxy,
)

self.client = self._init_elastic_client(
Expand All @@ -186,6 +188,7 @@ def __init__(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
use_system_proxy=use_system_proxy,
)

# configure mappings to ES fields that will be used for querying / displaying results
Expand Down Expand Up @@ -251,13 +254,18 @@ def _init_elastic_client(
ca_certs: Optional[str],
verify_certs: bool,
timeout: int,
use_system_proxy: bool,
) -> Elasticsearch:

hosts = self._prepare_hosts(host, port)

if (api_key or api_key_id) and not (api_key and api_key_id):
raise ValueError("You must provide either both or none of `api_key_id` and `api_key`")

connection_class: Type[Connection] = Urllib3HttpConnection
if use_system_proxy:
connection_class = RequestsHttpConnection

if api_key:
# api key authentication
client = Elasticsearch(
Expand All @@ -267,6 +275,7 @@ def _init_elastic_client(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)
elif aws4auth:
# aws elasticsearch with IAM
Expand All @@ -288,11 +297,17 @@ def _init_elastic_client(
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)
else:
# there is no authentication for this elasticsearch instance
client = Elasticsearch(
hosts=hosts, scheme=scheme, ca_certs=ca_certs, verify_certs=verify_certs, timeout=timeout
hosts=hosts,
scheme=scheme,
ca_certs=ca_certs,
verify_certs=verify_certs,
timeout=timeout,
connection_class=connection_class,
)

# Test connection
Expand Down