In [3]:
import requests
from elasticsearch import Elasticsearch

es = Elasticsearch()

# Return a response of the top 100 IAMA Reddit posts of all time
response = requests.get("http://api.reddit.com/r/iama/top/?t=all&limit=100", 
                        headers={"User-Agent":"TrackMaven"})

fields = ['title', 'selftext', 'author', 'score', 
        'ups', 'downs', 'num_comments', 'url', 'created']

# Loop through results and add each data dictionary to the ES "reddit" index
for i, iama in enumerate(response.json()['data']['children']):
    content = iama['data']
    doc = {}
    for field in fields:
        doc[field] = content[field]
    es.index(index="reddit", doc_type='iama', id=i, body=doc)

In [9]:
from elasticsearch import Elasticsearch

es = Elasticsearch()

# Fetch a specific result
res = es.get(index='reddit', doc_type='iama', id=1)
print(res['_source'])

# Update the index to be able to query against it
es.indices.refresh(index="reddit")

# Query for results: nothing will match this author
res = es.search(index="reddit", 
                body={"query": {"match": {"author": "no results here!"}}})
print(res)

# Query for all results (no matching criteria)
res = es.search(index="reddit", body={"query": {"match_all": {}}})
print(res['hits']['total'])
print(res['hits']['hits'][1]['_source']['title'])

# Query based on text appearing in the title
# (by default matches across capitalization, pluralization, etc)
res = es.search(index="reddit", body={"query": {"match": {"title": "obama"}}})
print(res['hits']['total'])
print(res['hits']['hits'][0]['_source']['title'])

{'selftext': "**My 6 Questions:**\n\n1. How did you enjoy your time working at Reddit?\n2. Were you expecting to be let go?\n3. What are you planning to do now?\n4. What was your favorite AMA?\n5. Would you come back, if possible?\n6. Are you planning to take [Campus Society's](http://blog.campussociety.com/an-open-letter-to-victoria-previously-of-reddit-become-victoria-of-campus-society/) Job offer?\n\n**Public Contact Information:** @happysquid is her twitter (Thanks /u/crabjuice23 And /u/edjamakated!) &amp; /u/chooter (Thanks /u/alsadius)\n\n\nEdit: The votes dropped from 17K+ to 10K+ in a matter of seconds...what?\n\nEdit again: I've lost a total of about 14K votes...Vote fuzzing seems a bit way too much\n", 'ups': 126765, 'downs': 0, 'num_comments': 2864, 'url': 'https://www.reddit.com/r/IAmA/comments/3c0iw2/ama_request_victoria_exama_mod/', 'author': 'korantano', 'score': 126765, 'created': 1435973760.0, 'title': '[AMA Request] Victoria, ex-AMA mod'}
{'took': 1, 'timed_out': Fals

In [37]:
import csv
from elasticsearch import Elasticsearch

# Map the fields of a new "trip" doc_type
mapping = {
    "trip": {
        "properties": {
            "duration": {"type": "integer"},
            "start_date": {"type": "string"},
            "start_station": {"type": "string", "index": "not_analyzed"},
            "start_terminal": {"type": "integer"},
            "end_date": {"type": "string"},
            "end_station": {"type": "string", "index": "not_analyzed"},
            "end_terminal": {"type": "integer"},
            "bike_id": {"type": "string"},
            "subscriber": {"type": "string"}
        }
    }
}

# Create a new "bikeshare" index that includes "trips" with the above mapping
es = Elasticsearch()
es.indices.delete("bikeshare")
es.indices.create("bikeshare")
es.indices.put_mapping(index="bikeshare", doc_type="trip", body=mapping)

# Import a CSV file of trip data - this will take quite a while!
with open('2010-Q4-cabi-trip-history-data.csv') as csvfile:
    reader = csv.reader(csvfile)
    header = next(reader) # Skip header row
    for id, row in enumerate(reader):
        h, m, s = row[0].split()
        trip_seconds = int(int(h.replace("h", ""))*60*60 + int(int(m.replace("min.", "")))*60 + int(int(s.replace("sec.", ""))))
        content = {
            "duration": trip_seconds,
            "start_date": row[1],
            "end_date": row[2],
            "start_station": row[3],
            "end_station": row[4],
            "bike_id": row[5],
            "member_type": row[6],
        }
        es.index(index="bikeshare", doc_type='trip', id=id, body=content)
        