Skip to content

Commit

Permalink
Merge e972b0e into 93d7ad5
Browse files Browse the repository at this point in the history
  • Loading branch information
gas1121 committed Jul 24, 2017
2 parents 93d7ad5 + e972b0e commit c93cff3
Show file tree
Hide file tree
Showing 43 changed files with 468 additions and 132 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.vscode

# Python binaries
*.pyc

Expand Down
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ env:
- docker: 1
dockerfile_name: Dockerfile.py2alpine
docker_tag_suffix: dev-alpine
- docker: 1
dockerfile_name: Dockerfile.py3
docker_tag_suffix: dev-py3

install: true

Expand Down
5 changes: 3 additions & 2 deletions crawler/crawling/distributed_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def update_ipaddress(self):
try:
obj = urllib.request.urlopen(settings.get('PUBLIC_IP_URL',
'http://ip.42.pl/raw'))
results = self.ip_regex.findall(obj.read())
results = self.ip_regex.findall(obj.read().decode('utf-8'))
if len(results) > 0:
self.my_ip = results[0]
else:
Expand Down Expand Up @@ -313,7 +313,8 @@ def report_self(self):
def from_settings(cls, settings):
server = redis.Redis(host=settings.get('REDIS_HOST'),
port=settings.get('REDIS_PORT'),
db=settings.get('REDIS_DB'))
db=settings.get('REDIS_DB'),
decode_responses=True)
persist = settings.get('SCHEDULER_PERSIST', True)
up_int = settings.get('SCHEDULER_QUEUE_REFRESH', 10)
hits = settings.get('QUEUE_HITS', 10)
Expand Down
3 changes: 2 additions & 1 deletion crawler/crawling/log_retry_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def setup(self, settings):
if self.settings['STATS_STATUS_CODES']:
self.redis_conn = redis.Redis(host=self.settings.get('REDIS_HOST'),
port=self.settings.get('REDIS_PORT'),
db=settings.get('REDIS_DB'))
db=settings.get('REDIS_DB'),
decode_responses=True)

try:
self.redis_conn.info()
Expand Down
3 changes: 2 additions & 1 deletion crawler/crawling/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def from_settings(cls, settings):
producer = KafkaProducer(bootstrap_servers=settings['KAFKA_HOSTS'],
retries=3,
linger_ms=settings['KAFKA_PRODUCER_BATCH_LINGER_MS'],
buffer_memory=settings['KAFKA_PRODUCER_BUFFER_BYTES'])
buffer_memory=settings['KAFKA_PRODUCER_BUFFER_BYTES'],
value_serializer=lambda m: m.encode('utf-8'))
except Exception as e:
logger.error("Unable to connect to Kafka in Pipeline"\
", raising exit flag.")
Expand Down
3 changes: 2 additions & 1 deletion crawler/crawling/redis_stats_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def setup(self, settings):
# set up redis
self.redis_conn = redis.Redis(host=settings.get('REDIS_HOST'),
port=settings.get('REDIS_PORT'),
db=settings.get('REDIS_DB'))
db=settings.get('REDIS_DB'),
decode_responses=True)

try:
self.redis_conn.info()
Expand Down
6 changes: 4 additions & 2 deletions crawler/tests/online.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def setUp(self):
# set up redis
self.redis_conn = redis.Redis(host=self.settings['REDIS_HOST'],
port=self.settings['REDIS_PORT'],
db=self.settings['REDIS_DB'])
db=self.settings['REDIS_DB'],
decode_responses=True)
try:
self.redis_conn.info()
except ConnectionError:
Expand All @@ -66,7 +67,8 @@ def setUp(self):
group_id="demo-id",
auto_commit_interval_ms=10,
consumer_timeout_ms=5000,
auto_offset_reset='earliest'
auto_offset_reset='earliest',
value_deserializer=lambda m: m.decode('utf-8')
)
time.sleep(1)

Expand Down
35 changes: 35 additions & 0 deletions docker/crawler/Dockerfile.py3
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM python:3.6
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update && apt-get -y install \
python-lxml \
build-essential \
libssl-dev \
libffi-dev \
python-dev \
libxml2-dev \
libxslt1-dev \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# install requirements
COPY utils /usr/src/utils
COPY crawler/requirements.txt /usr/src/app/
RUN pip install --no-cache-dir -r requirements.txt
RUN rm -rf /usr/src/utils

# move codebase over
COPY crawler /usr/src/app

# override settings via localsettings.py
COPY docker/crawler/settings.py /usr/src/app/crawling/localsettings.py

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run the spider
CMD ["scrapy", "runspider", "crawling/spiders/link_spider.py"]
27 changes: 27 additions & 0 deletions docker/kafka-monitor/Dockerfile.py3
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM python:3.6
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# install requirements
COPY utils /usr/src/utils
COPY kafka-monitor/requirements.txt /usr/src/app/
RUN pip install --no-cache-dir -r requirements.txt
RUN rm -rf /usr/src/utils

# move codebase over
COPY kafka-monitor /usr/src/app

# override settings via localsettings.py
COPY docker/kafka-monitor/settings.py /usr/src/app/localsettings.py

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["python", "kafka_monitor.py", "run"]
27 changes: 27 additions & 0 deletions docker/redis-monitor/Dockerfile.py3
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM python:3.6
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# install requirements
COPY utils /usr/src/utils
COPY redis-monitor/requirements.txt /usr/src/app/
RUN pip install --no-cache-dir -r requirements.txt
RUN rm -rf /usr/src/utils

# move codebase over
COPY redis-monitor /usr/src/app

# override settings via localsettings.py
COPY docker/redis-monitor/settings.py /usr/src/app/localsettings.py

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["python", "redis_monitor.py"]
27 changes: 27 additions & 0 deletions docker/rest/Dockerfile.py3
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM python:3.6
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# install requirements
COPY utils /usr/src/utils
COPY rest/requirements.txt /usr/src/app/
RUN pip install --no-cache-dir -r requirements.txt
RUN rm -rf /usr/src/utils

# move codebase over
COPY rest /usr/src/app

# override settings via localsettings.py
COPY docker/rest/settings.py /usr/src/app/localsettings.py

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["python", "rest_service.py"]
17 changes: 13 additions & 4 deletions docker/run_docker_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@ if [ $? -eq 1 ]; then
exit 1
fi

python tests/online.py -v
if [ $? -eq 1 ]; then
echo "integration tests failed"
exit 1
# if 3 parameters passed in, then it's util's test
if [ $# -eq 3 ]; then
python tests/online.py -r $1 -p $2 -z $3
if [ $? -eq 1 ]; then
echo "integration tests failed"
exit 1
fi
else
python tests/online.py -v
if [ $? -eq 1 ]; then
echo "integration tests failed"
exit 1
fi
fi
20 changes: 20 additions & 0 deletions docker/utils/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:2.7
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# move codebase over and install requirements
COPY utils /usr/src/app
RUN pip install .
RUN pip install nose

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["ping", "localhost"]
33 changes: 33 additions & 0 deletions docker/utils/Dockerfile.py2alpine
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM python:2.7.12-alpine
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# move codebase over
WORKDIR /usr/src/app
COPY utils /usr/src/app

# Combine run command to create single intermeiate image layer
# This MANDATORY because developments dependencies are huge.
RUN mkdir -p /usr/src/app \
&& cd /usr/src/app \
# Installing runtime dependencies
&& apk --no-cache add \
curl \
# Installing buildtime dependencies. They will be removed at end of this
# commands sequence.
&& apk --no-cache add --virtual build-dependencies \
build-base \
# Updating pip itself before installing packages
&& pip install --no-cache-dir pip setuptools \
# Installing scutils from local codebase
&& pip install . \
# Removing build dependencies leaving image layer clean and neat
&& apk del build-dependencies
RUN pip install nose

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["ping", "localhost"]
20 changes: 20 additions & 0 deletions docker/utils/Dockerfile.py3
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.6
MAINTAINER Madison Bahmer <madison.bahmer@istresearch.com>

# os setup
RUN apt-get update
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# move codebase over and install requirements
COPY utils /usr/src/app
RUN pip install .
RUN pip install nose

# copy testing script into container
COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh

# set up environment variables

# run command
CMD ["ping", "localhost"]
Loading

0 comments on commit c93cff3

Please sign in to comment.