In [1]:
from twittercrawler.crawler import TwitterCrawler
from twittercrawler.utils import load_json_result
import twittercrawler.search as su

# 1. Setup TwitterCrawler

### Initialize and authenticate TwitterCrawler

In [None]:
time_frame = 15*60
max_request_in_time_frame = 400

In [None]:
tcs = TwitterCrawler(time_frame=time_frame,max_requests=max_request_in_time_frame)

### Connect to MongoDB collection

In [None]:
tcs.connect_to_mongodb(collection_name="raw", port=27017, db_name="twitter-crawler")

### Authenticate TwitterCrawler

In [None]:
tcs.authenticate("../api_key.json")

# 2. Search for events

## i.) Set search parameters

In [None]:
query = " OR ".join(["@CNN","@BBC","@guardian","@nytimes","#BREAKING"])

In [None]:
search_params = {
    "q":query,
    "result_type":'recent',
    "count":100
}

In [None]:
tcs.set_search_arguments(search_args=search_params)

## ii.) Implement your custom filter functions if needed

In [None]:
my_since_id = 870285658723684355
def my_since_id_filter(tweet):
    return su.id_bound_fiter(tweet, since_id=my_since_id)

In [None]:
my_created_at="Thu Jun 01 00:00:00 +0000 2017"
def my_time_bound_filter(tweet):
    return su.time_bound_filter(tweet, created_at=my_created_at)

## iii.) Run search

There are two different search strategies available

### a.) Recursive search

   * Here your search starts at a specific time. It is the current time if you does not set any **current_max_id** parameter
   * Then the search tries to explore past events that match your search parameters
   * The search terminates if you:
      * set **custom_since_id**: events with smaller id won't be returned
      **OR**
      * set **term_func**: events older than the first event that matches this termination function won't be returned. For example you can set a time lower bound for your search.
      * all events matching your search parameters have been returned
      * **interrupt the execution**

#### Terminate by since_id

In [None]:
tcs.search_by_query(wait_for=3, custom_since_id=870285658723684355, feedback_time=10*60)

#### Terminate by custom termination function 

In [None]:
tcs.search_by_query(wait_for=3, term_func=my_since_id_filter, feedback_time=10*60)

In [None]:
tcs.search_by_query(wait_for=3, term_func=my_time_bound_filter, feedback_time=10*60)

### b.) Stream search

   * This search starts with a recursive search, which goes back into the past until an event matches the termination function.
   * Then the search jumps back to the present and starts a new recursive search until all events are recovered since the starting time of the previous recursive search etc.
   * The search terminates if you:
      * **interrupt the execution**
   * This type of search can be used to follow ongoing events

In [None]:
tcs.stream_search(delta_t=120, termination_func=my_time_bound_filter,feedback_time=60)

In [None]:
tcs.close()