Skip to content

Commit

Permalink
initial setup harvester
Browse files Browse the repository at this point in the history
  • Loading branch information
vict committed Jul 8, 2016
1 parent d9084f0 commit ea42988
Show file tree
Hide file tree
Showing 32 changed files with 975 additions and 30 deletions.
9 changes: 9 additions & 0 deletions .bumpversion.cfg
@@ -0,0 +1,9 @@
[bumpversion]
current_version = 0.1.0

[bumpversion:file:docker/prod/harvester/Dockerfile]

[bumpversion:file:docker/prod/exporter/Dockerfile]

[bumpversion:file:setup.py]

33 changes: 4 additions & 29 deletions .gitignore
@@ -1,7 +1,6 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
Expand Down Expand Up @@ -43,47 +42,23 @@ htmlcov/
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/
# PyCharm
.idea/

# Spyder project settings
.spyderproject
# Test configuration
test_config.py

# Rope project settings
.ropeproject
22 changes: 22 additions & 0 deletions .travis.yml
@@ -0,0 +1,22 @@
language: python
python:
- '2.7'
sudo: required
services:
- docker
before_install:
- sudo apt-get update
- sudo apt-get install -y -o Dpkg::Options::="--force-confnew" docker-engine
- sudo pip install -U docker-compose
- sudo pip install --upgrade ndg-httpsclient
- docker-compose -f docker/ci.docker-compose.yml pull
- docker-compose -f docker/ci.docker-compose.yml up -d
- sleep 30
- docker exec docker_sfmtumblrharvester_1 python -m unittest discover
install: pip install -r requirements/master.txt
script: python -m unittest discover
notifications:
email:
- ychtan@email.gwu.edu
slack:
on_success: never
1 change: 1 addition & 0 deletions LICENSE
Expand Up @@ -19,3 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

33 changes: 32 additions & 1 deletion README.md
@@ -1,2 +1,33 @@
# sfm-tumblr-harvester
A basic harvester for Tumblr public post data as part of Social Feed Manager. http://gwu-libraries.github.io/sfm-ui
A basic harvester for Tumblr public post data as part of [Social Feed Manager](https://gwu-libraries.github.io/sfm-ui).

[![Build Status](https://travis-ci.org/gwu-libraries/sfm-tumblr-harvester.svg?branch=master)](https://travis-ci.org/gwu-libraries/sfm-tumblr-harvester)

Provides harvesters for [Tumblr API](https://www.tumblr.com/docs/en/api/v2) and harvesting is performed by official API client [pytumblr](https://github.com/tumblr/pytumblr).

# Install
```bash
git clone https://github.com/gwu-libraries/sfm-tumblr-harvester
cd sfm-tumblr-harvester
pip install -r requirements/requirements.txt
```

# Ready to work
* Sign up an account at [Tumblr](https://www.tumblr.com).
* Register an application at [here](https://www.tumblr.com/oauth/apps) to get your `CONSUMER_KEY`, `CONSUMER_SECRET`.
* Provide your `CONSUMER_KEY` and `CONSUMER_SECRET` and get your access token at [here]( https://api.tumblr.com/console).
* Once you are succeed authorized your APP, click the `Show Keys` button at the top-right.
* An example of the keys looks like (the following keys are invalid):

```bash
CONSUMER_KEY = "3jlICwerCIWqEdUdAyuenNyercwkVuXOuYFoxTPafWx8DsUMe2"
CONSUMER_SECRET = "sTCdLJ9kdfgEwTPoYIdfdsteF0XB8WiHlczLx0GgvzRim1L47n"
ACCESS_TOKEN = "sdrsaPx5FtpJ0tfZAG13kMZMjenouGsdJw9W7ssK6husepcFoWg"
ACCESS_TOKEN_SECRET = "0VxKNAMSiNO8IT6PsdattmUsdsfI5X1hP4usBNZLllgkhwsdQiY"
________________________________________________________________________
API_KEY = "3jlICwerCIWqEdUdAyuenNyercwkVuXOuYFoxTPafWx8DsUMe2"

```



20 changes: 20 additions & 0 deletions docker/ci.docker-compose.yml
@@ -0,0 +1,20 @@
sfmrabbit:
image: rabbitmq@sha256:a5180a37b0baebb938ee9d12dd11eed64a909288d7f344e24771278f8a122367
environment:
- TZ=America/New_York
- RABBITMQ_DEFAULT_USER=sfm_user
- RABBITMQ_DEFAULT_PASS=password
sfmtumblrharvester:
image: gwul/sfm-tumblr-harvester:dev
links:
- sfmrabbit:mq
volumes:
- "..:/opt/sfm-tumblr-harvester"
environment:
- TZ=America/New_York
- TUMBLR_CONSUMER_KEY
- TUMBLR_CONSUMER_SECRET
- TUMBLR_ACCESS_TOKEN
- TUMBLR_ACCESS_TOKEN_SECRET

command: bash -c "pip install -r requirements/master.txt --upgrade && appdeps.py --port-wait mq:5672 && python tumblr_harvester.py --debug=True service mq sfm_user password"
34 changes: 34 additions & 0 deletions docker/dev.docker-compose.yml
@@ -0,0 +1,34 @@
sfmrabbit:
image: rabbitmq@sha256:a5180a37b0baebb938ee9d12dd11eed64a909288d7f344e24771278f8a122367
ports:
- "15672:15672"
restart: always
environment:
- TZ=America/New_York
- RABBITMQ_DEFAULT_USER=sfm_user
- RABBITMQ_DEFAULT_PASS=password
sfmtumblrharvester:
image: gwul/sfm-tumblr-harvester:dev
links:
- sfmrabbit:mq
volumes:
- "..:/opt/sfm-tumblr-harvester"
- "../../sfm-utils:/opt/sfm-utils"
- "../../warcprox:/opt/warcprox"
environment:
- TZ=America/New_York
- DEBUG=True
# restart: always

sfmtumblrexporter:
image: gwul/sfm-tumblr-exporter:dev
links:
- sfmrabbit:mq
volumes:
- "..:/opt/sfm-tumblr-harvester"
- "../../sfm-utils:/opt/sfm-utils"
- "../../warcprox:/opt/warcprox"
environment:
- TZ=America/New_York
- DEBUG=True
# restart: always
24 changes: 24 additions & 0 deletions docker/dev/exporter/Dockerfile
@@ -0,0 +1,24 @@
FROM python@sha256:ad39551743b356efda7c61f46019b97d49d1aab01b97f0e6d87c9b34326f3bfe
MAINTAINER Vict Tan <ychtan@email.gwu.edu>

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
git=1:2.1.4-2.1+deb8u1

#pip set in 7.1.2
RUN pip install pip==7.1.2
#Avoid the warning of https
RUN pip install --upgrade ndg-httpsclient

#copy the requirement
ADD https://raw.githubusercontent.com/gwu-libraries/sfm-tumblr-harvester/master/requirements/common.txt /tmp/
ADD https://raw.githubusercontent.com/gwu-libraries/sfm-tumblr-harvester/master/requirements/requirements.txt /tmp/

RUN pip install -r /tmp/requirements.txt
RUN pip install appdeps
#It should mounted as your development dir
WORKDIR /opt/sfm-tumblr-harvester

CMD pip install -r requirements/dev.txt --upgrade \
&& appdeps.py --port-wait mq:5672 \
&& python tumblr_exporter.py --debug=$DEBUG service mq $MQ_ENV_RABBITMQ_DEFAULT_USER $MQ_ENV_RABBITMQ_DEFAULT_PASS http://api
20 changes: 20 additions & 0 deletions docker/dev/exporter/README.md
@@ -0,0 +1,20 @@
# sfm-tumblr-exporter dev docker container

A docker container for running sfm-tumblr-exporter as a service.
The harvester code must be mounted as `/opt/sfm-tumblr-harvester`, the sfm-utils code as `/opt/sfm-utils` and the warcprox code as `/opt/warcprox`.
For example:

```python
volumes:
- "/my_directory/sfm-tumblr-harvester:/opt/sfm-tumblr-harvester"
- "/my_directory/sfm-utils:/opt/sfm-utils"
- "/my_directory/warcprox:/opt/warcprox"
```

This container requires a link to a container running the queue. This must be linked with the alias `mq`.
For example:

```python
links:
- sfmrabbit:mq
```
24 changes: 24 additions & 0 deletions docker/dev/harvester/Dockerfile
@@ -0,0 +1,24 @@
FROM python@sha256:ad39551743b356efda7c61f46019b97d49d1aab01b97f0e6d87c9b34326f3bfe
MAINTAINER Vict Tan <ychtan@email.gwu.edu>

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
git=1:2.1.4-2.1+deb8u1

#pip set in 7.1.2
RUN pip install pip==7.1.2
#Avoid the warning of https
RUN pip install --upgrade ndg-httpsclient

#copy the requirement
ADD https://raw.githubusercontent.com/gwu-libraries/sfm-tumblr-harvester/master/requirements/common.txt /tmp/
ADD https://raw.githubusercontent.com/gwu-libraries/sfm-tumblr-harvester/master/requirements/requirements.txt /tmp/

RUN pip install -r /tmp/requirements.txt
RUN pip install appdeps
#It should mounted as your development dir
WORKDIR /opt/sfm-tumblr-harvester

CMD pip install -r requirements/dev.txt --upgrade \
&& appdeps.py --port-wait mq:5672 \
&& python tumblr_harvester.py --debug=$DEBUG service mq $MQ_ENV_RABBITMQ_DEFAULT_USER $MQ_ENV_RABBITMQ_DEFAULT_PASS
20 changes: 20 additions & 0 deletions docker/dev/harvester/README.md
@@ -0,0 +1,20 @@
# sfm-tumblr-harvester dev docker container

A docker container for running sfm-tumblr-harvester as a service.
The harvester code must be mounted as `/opt/sfm-tumblr-harvester`, the sfm-utils code as `/opt/sfm-utils` and the warcprox code as `/opt/warcprox`.
For example:

```python
volumes:
- "/my_directory/sfm-tumblr-harvester:/opt/sfm-tumblr-harvester"
- "/my_directory/sfm-utils:/opt/sfm-utils"
- "/my_directory/warcprox:/opt/warcprox"
```

This container requires a link to a container running the queue. This must be linked with the alias `mq`.
For example:

```python
links:
- sfmrabbit:mq
```
25 changes: 25 additions & 0 deletions docker/master.docker-compose.yml
@@ -0,0 +1,25 @@
sfmrabbit:
image: rabbitmq@sha256:a5180a37b0baebb938ee9d12dd11eed64a909288d7f344e24771278f8a122367
ports:
- "15672:15672"
restart: always
environment:
- TZ=America/New_York
- RABBITMQ_DEFAULT_USER=sfm_user
- RABBITMQ_DEFAULT_PASS=password
sfmtumblrharvester:
image: gwul/sfm-tumblr-harvester:master
links:
- sfmrabbit:mq
restart: always
environment:
- TZ=America/New_York
- DEBUG=True
sfmtumblrexporter:
image: gwul/sfm-tumblr-exporter:master
links:
- sfmrabbit:mq
restart: always
environment:
- TZ=America/New_York
- DEBUG=True
22 changes: 22 additions & 0 deletions docker/master/exporter/Dockerfile
@@ -0,0 +1,22 @@
FROM python@sha256:ad39551743b356efda7c61f46019b97d49d1aab01b97f0e6d87c9b34326f3bfe
MAINTAINER Vict Tan <ychtan@email.gwu.edu>

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
zip=3.0-8 \
git=1:2.1.4-2.1+deb8u1
#Upgrade pip
RUN pip install pip==7.1.2
#Avoid the warning of https
RUN pip install --upgrade ndg-httpsclient

WORKDIR /tmp
ADD https://github.com/gwu-libraries/sfm-tumblr-harvester/archive/master.zip /tmp/
RUN unzip master.zip
RUN mv sfm-tumblr-harvester-master /opt/sfm-tumblr-harvester
WORKDIR /opt/sfm-tumblr-harvester
RUN pip install -r requirements/master.txt
RUN pip install appdeps
CMD appdeps.py --port-wait mq:5672 \
&& python tumblr_exporter.py --debug=$DEBUG service mq $MQ_ENV_RABBITMQ_DEFAULT_USER $MQ_ENV_RABBITMQ_DEFAULT_PASS http://api

11 changes: 11 additions & 0 deletions docker/master/exporter/README.md
@@ -0,0 +1,11 @@
# sfm-tumblr-exporter master docker container

A docker container for running sfm-tumblr-exporter as a service.

This container requires a link to a container running the queue. This must be linked with the alias `mq`.
For example:

```python
links:
- sfmrabbit:mq
```
22 changes: 22 additions & 0 deletions docker/master/harvester/Dockerfile
@@ -0,0 +1,22 @@
FROM python@sha256:ad39551743b356efda7c61f46019b97d49d1aab01b97f0e6d87c9b34326f3bfe
MAINTAINER Vict Tan <ychtan@email.gwu.edu>

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
zip=3.0-8 \
git=1:2.1.4-2.1+deb8u1
#Upgrade pip
RUN pip install pip==7.1.2
#Avoid the warning of https
RUN pip install --upgrade ndg-httpsclient

WORKDIR /tmp
ADD https://github.com/gwu-libraries/sfm-tumblr-harvester/archive/master.zip /tmp/
RUN unzip master.zip
RUN mv sfm-tumblr-harvester-master /opt/sfm-tumblr-harvester
WORKDIR /opt/sfm-tumblr-harvester
RUN pip install -r requirements/master.txt
RUN pip install appdeps
CMD appdeps.py --port-wait mq:5672 \
&& python tumblr_harvester.py --debug=$DEBUG service mq $MQ_ENV_RABBITMQ_DEFAULT_USER $MQ_ENV_RABBITMQ_DEFAULT_PASS

11 changes: 11 additions & 0 deletions docker/master/harvester/README.md
@@ -0,0 +1,11 @@
# sfm-tumblr-harvester master docker container

A docker container for running sfm-tumblr-harvester as a service.

This container requires a link to a container running the queue. This must be linked with the alias `mq`.
For example:

```python
links:
- sfmrabbit:mq
```

0 comments on commit ea42988

Please sign in to comment.