Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
84 lines (65 sloc) 3.04 KB
# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See:
import logging
import maya
# The path for this is weird. I spent some time trying to get it to work with the a more sane import statement
# but I was not (yet) successful.
from carbalert_django.models import Thread, SearchPhrase
from carbalert.carbalert_scrapy.carbalert_scrapy.tasks import send_email_notification
class CarbalertPipeline(object):
def process_item(self, item, spider):"CarbalertPipeline: Processing item")
thread_id = item["thread_id"]"Checking if thread ID ({thread_id}) exists in DB...")
if Thread.objects.filter(thread_id=thread_id).exists():
logging.debug("Thread already exists.")
return item"No existing thread for ID.")
search_phrases = SearchPhrase.objects.values_list("phrase", flat=True)
title = item["title"]
text = item["text"]
thread_url = item["thread_url"]
thread_datetime = maya.parse(item["datetime"])
email_list = {}
for search_phrase in search_phrases:"Scanning title and text for search phrase: {search_phrase}")
if (
search_phrase.lower() in title.lower()
or search_phrase.lower() in text.lower()
):"Found search phrase: {search_phrase}")
search_phrase_object = SearchPhrase.objects.get(phrase=search_phrase)
for user in search_phrase_object.email_users.all():
f"Found user {user} associated to search phrase {search_phrase}"
if user in email_list:
email_list[user] = [search_phrase]"Saving thread ID ({thread_id}) to DB.")
thread = Thread.objects.get(thread_id=thread_id)
except Thread.DoesNotExist:
thread = Thread()
thread.thread_id = thread_id
thread.title = title
thread.text = text
thread.url = thread_url
thread.datetime = thread_datetime.datetime()
local_datetime = thread_datetime.datetime(to_timezone="Africa/Harare").strftime(
"%d-%m-%Y %H:%M"
for user in email_list:
f"Sending email notification to user {user} for thread ID {thread_id}, thread title: {title}"
send_email_notification.delay(, email_list[user], title, text, thread_url, local_datetime
return item