Skip to content
Permalink
Browse files

1. fix a item parse bug

2. modify Dockerfile
  • Loading branch information...
gxtrobot committed Sep 9, 2019
1 parent 4c6c7c7 commit ba16457a2be5922fe00602ef1dd458b49da2970c
Showing with 17 additions and 34 deletions.
  1. +5 −12 Dockerfile
  2. +5 −6 bustag/spider/parser.py
  3. +2 −12 docker/entry.sh
  4. +5 −4 tests/test_parser.py
@@ -11,7 +11,7 @@ RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak && mv ./sources.list /etc
RUN apt-get -o Acquire::Check-Valid-Until=false update \
&& apt-get install \
--no-install-recommends --yes \
build-essential libpq-dev cron \
build-essential libpq-dev cron git \
python3-dev --yes

FROM base as build
@@ -28,34 +28,27 @@ COPY ./docker/sources.list .

RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak && mv ./sources.list /etc/apt/

RUN apt-get update && apt-get -y install cron
RUN apt-get update && apt-get -y install cron git

WORKDIR /app

COPY ./docker/crontab.txt /etc/cron.d/crontab.txt

# Give execution rights on the cron job
RUN chmod 0644 /etc/cron.d/crontab.txt

RUN crontab /etc/cron.d/crontab.txt

COPY --from=build /install /install

COPY requirements.txt .

RUN pip install --no-index --find-links=/install -r requirements.txt

COPY . /app
RUN mkdir /app/docker

RUN pip install -e .
COPY docker/entry.sh /app/docker/

RUN touch /var/log/bustag.log

RUN rm -rf /install && rm -rf /root/.cache/pip

RUN chmod 755 /app/docker/*.sh

EXPOSE 8080
EXPOSE 8000

LABEL maintainer="gxtrobot <gxtrobot@gmail.com>"

@@ -40,12 +40,11 @@ def parse_item(text):
tag_value = ''
tag_link = ''
links = tag.find('a')
if links and len(links) == 1:
spans = tag.find('span')
if spans:
tag_type = (spans[0].text)
tag_link = links[0].attrs['href']
tag_value = links[0].text
spans = tag.find('span.header')
if spans and len(links) == 1:
tag_type = (spans[0].text)
tag_link = links[0].attrs['href']
tag_value = links[0].text
if tag_type != '' and tag_value != '':
tag_list.append(Tag(tag_type, tag_value, tag_link))
else:
14 docker/entry.sh 100644 → 100755
@@ -1,17 +1,7 @@
#!/bin/bash

PYTHON=python3
# check if crontab.txt exists

echo `pwd`

if [ -e './data/crontab.txt' ]
then
crontab data/crontab.txt
echo 'use new crontab.txt'
else
echo 'use default crontab.txt'
fi
# start cron
service cron start

gunicorn bustag.app.index:app --bind='0.0.0.0:8080'
${PYTHON} -m bustag.app.index
@@ -5,10 +5,11 @@

@pytest.fixture
def html():
url = 'https://www.cdnbus.bid/MADM-116'
with open('./tests/item.html') as f:
html = f.read()
return html
url = 'https://www.cdnbus.bid/SHKD-875'
# url = 'https://www.cdnbus.bid/CESD-797'
session = HTMLSession()
r = session.get(url)
return r.text


def test_process_item(html):

0 comments on commit ba16457

Please sign in to comment.
You can’t perform that action at this time.