In [39]:
import requests
import json
import os
from elasticsearch import (
    Elasticsearch,
    helpers
)
client = Elasticsearch()
Headers= {'Content-Type' : "application/json" }

def transform(esDoc):
    pass


# Some utilities for flattening the explain into something a bit more
# readable. Pass Explain JSON, get something readable (ironically this is what Solr's default output is :-p)
def flatten(l):
    [item for sublist in l for item in sublist]

def simplerExplain(explainJson, depth=0):
    result = " " * (depth * 2) + "%s, %s\n" % (explainJson['value'], explainJson['description'])
    #print json.dumps(explainJson, indent=True)
    if 'details' in explainJson:
        for detail in explainJson['details']:
            result += simplerExplain(detail, depth=depth+1)
    return result


# To speed up the pace of development, we really need to focus more heavily on the analysis and query
# settings of the search engine, rather than fidly bits of the http interface.
#
# To that end, we're going to collapse some of the code you were introduced to in chapter 3 into more general functions,
# so we can reuse them. Largely, this is the exact same code you saw in chapter 3 some more generality.

## Analyze
## The analyze function is a helper for accessing the _analyze endpoint like we did in chapter 3. Recall,
## given a field or analyzer, passing some text to _analyze will return the token stream that results from
## that analyzer. This token stream, if you recall, shows us exactly how the search engine translate text
## into individual tokens to be consumed by the underlying data structures. When we debug analysis, we see
## matches we need to expect.
def analyze(text, field=None, analyzer=None):
    whatToAnalyze = ''
    if field is not None:
        whatToAnalyze = "field=%s" % field
    elif analyzer is not None:
        whatToAnalyze = "analyzer=%s" % analyzer
    resp = requests.get("http://localhost:9200/tmdb/_analyze?%s&format=yaml" % whatToAnalyze, 
                        data=text,headers=Headers)
    print resp.text
    
## Search
## Next we need to wrap up our execution of query DSL queries. The function 'search' will execute the passed query DSL
## query and display the results. 
## If a scoring explain is associated with the results, then it also gets displayed,
## We'll also be sure to dump the query DSL
def search(query, verbose=False):
    url = 'http://localhost:9200/tmdb/_search'
    httpResp = requests.get(url, data=json.dumps(query),headers=Headers)
    if httpResp.status_code != 200:
        print "Search Failed <%s>" % httpResp.status_code
        print "%s" % httpResp.text
    searchHits = json.loads(httpResp.text)['hits']
    print "Num\tRelevance Score\t\tMovie Title"
    for idx, hit in enumerate(searchHits['hits']):
            castNames = []            
            castCharacters = []                        
            directorNames = []
            for cast in hit['_source']['cast']:
                castNames.append(cast['name'])
                castCharacters.append(cast['character'])
            for director in hit['_source']['directors']:
                directorNames.append(director['name'])
            print "%s\t%s\t\t%s\t%s\t%s" % (idx + 1, hit['_score'], 
                                      hit['_source']['title'], 
                                      hit['_source']['vote_average'],
                                      hit['_source']['release_date'])
            if verbose:
                print "%s" % hit['_source']['title']
                print "%s" % hit['_source']['tagline']        
                print "%s" % hit['_source']['overview']        
                print "%s" % hit['_id']
                print "DIRS %s" % directorNames
                print "CAST %s" % castNames
                print "CHAR %s" % castCharacters
                if '_explanation' in hit:
                    print "%s" % simplerExplain(hit['_explanation'])
                    print "*************************************"
    
    if verbose:
        httpResp = requests.get('http://localhost:9200' + 
                    '/tmdb/movie/_validate/query?explain',
                     data=json.dumps({'query': query['query']}),headers=Headers)
        print json.loads(httpResp.text)

## Reindex
## Reindex takes analyzer and field mappings, recreates the index, and then reindexes
## TMDB movies using the _bulk index API. There are other ways for modifying the configuration
## of the index besides dropping and restarting, however for convenience and because our data
## isn't truly that large, we'll just delete and start from scratch when we need to.
def reindex(analysisSettings, mappingSettings=None, movieDict={}):
    # Destroy any existing index (equiv to SQL "drop table")
    resp = requests.delete("http://localhost:9200/tmdb",headers=Headers)
    print "Delete TMDB Index <%s>" % resp.status_code
    
    # Create the index with explicit settings
    # We need to explicitely set number of shards to 1 to eliminate the impact of 
    # distributed IDF on our small collection
    # See also "Relavance is Broken!"
    # http://www.elastic.co/guide/en/elasticsearch/guide/current/relevance-is-broken.html
    settings = {
        "settings": {
            "number_of_shards": 1,
            "index": {
                "analysis" : analysisSettings,
            }
        }
    }
    if mappingSettings:
        settings['mappings'] = mappingSettings
    resp = requests.put("http://localhost:9200/tmdb", data=json.dumps(settings),headers=Headers)
    print "Create TMDB Index <%s>" % resp.status_code
    if resp.status_code != 200:
        print resp.text
    
    # Bulk index title & overview to the movie endpoint
    print "Indexing %i movies" % len(movieDict.keys())
    actions = (format_doc(doc) for id,doc in movieDict.iteritems())
    results = [details for success,details in helpers.streaming_bulk(client, actions,chunk_size=5000) if not success]
    print "Bulk Index into TMDB Index <%s>" % results


def extract(movieIds=[], numMovies=10000):
    if len(movieIds) == 0:
        try:
            f = open('tmdb.json')
            if f:
                return json.loads(f.read());
        except IOError:
            pass       
    return movieDict


def format_doc(doc):
    transform(doc)
    action = {
        "_index": "tmdb",
        "_type": "_doc",
        "_id": doc['id'],
        "_source": doc
        }
    return action

## Index to ES, Chapter 5 Settings

In [62]:
movieDict = extract([])

analysisSettings = {
   "analyzer" : {
      "default" : {
        "type" : "english"
      },
      "english_bigrams": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "porter_stem",
            "bigram_filter"
          ]
      }
    },
  "filter": {
    "bigram_filter": {
        "type": "shingle",
        "max_shingle_size":2,
        "min_shingle_size":2,
        "output_unigrams":"false"
    }
  }
}

            
mappingSettings = {
#     'movie': {
        'properties': {
            # Some bug means I have to be explicit about analyzer (ie default analyzer)
            # doesn't apply
            "overview": {
                'type': 'text',
                 'analyzer': 'english',

            },
            "title": {
                'type': 'text',
                 'analyzer': 'english',

            },
            "cast": {
               'properties': {
                  'name': {
                      'type': 'text',
                      'analyzer': 'english',
                      'fields': {
                         "bigramed": {
                            "type": "text",
                            "analyzer": "english_bigrams",
                            #"norms" : {
                            #   "enabled" : False
                            #}
                        }     
                      }
                   }
                   
               }
            },
            "directors": {
               'properties': {
                  'name': {
                      'type': 'text',
                      'analyzer': 'english',
                      'fields': {
                         "bigramed": {
                            "type": "text",
                            "analyzer": "english_bigrams",
                            #"norms" : {
                            #   "enabled" : False
                            #}
                        }                       
                      },
                 
                   }
                   
               }
            }            
        }
#     }
}

reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
Bulk Index into TMDB Index <[]>


# Last Query from Chapter 5

In [7]:
usersSearch = 'star trek patrick stewart william shatner'
query = {
    'query': {
        'bool': {
            'should': [ 
                {
                   'multi_match': { 
                      'query': usersSearch,  #User's query
                      'fields': ['directors.name.bigramed', #B 
				      'cast.name.bigramed'],
                      'type': 'cross_fields'
                      }
                 },
                {
                   'multi_match': {
                     'query': usersSearch,  #User's query
                     'fields': ['overview', 'title', #C
  'directors.name', 'cast.name'],
                     'type': 'cross_fields'                                
                   }
                },
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query) 


Num	Relevance Score		Movie Title
1	32.55439		Star Trek: Generations	6.5	1994-11-17
2	20.59391		Star Trek: Insurrection	6.3	1998-12-10
3	20.580244		Star Trek: Nemesis	6.3	2002-12-12
4	20.264833		Star Trek II: The Wrath of Khan	7.1	1982-06-03
5	19.561811		Star Trek V: The Final Frontier	5.4	1989-06-09


# 7.2.1, Listing 1 Base Query

In [8]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'multi_match': {
           'query': usersSearch,  #User's query
            'fields': ['overview', 'title', #C
                       'directors.name', 'cast.name'],
            'type': 'cross_fields'                                
         }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	13.512724		Star Trek: Generations	6.5	1994-11-17
2	9.744539		Showtime	5.3	2002-03-14
3	9.46119		Osmosis Jones	5.4	2001-08-07
4	9.286617		The Wild	5.0	2006-04-13
5	8.662214		Miss Congeniality 2: Armed and Fabulous	5.3	2005-03-11


# 7.2.3, Listing 2 Boosting with An Additional Boolean Clause

In [12]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
            'should': [
            {'multi_match': { #A
               'query': usersSearch,  #User's query
                'fields': ['overview', 'title', #C
                           'directors.name', 'cast.name'],
                'type': 'cross_fields'                                
             }},
            { #B
               'match_phrase': {
                    'title': {
                        'query': 'star trek',
                    }
                }
            }
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	22.520575		Star Trek: Generations	6.5	1994-11-17
2	16.638094		Star Trek: Nemesis	6.3	2002-12-12
3	15.981052		Star Trek: Insurrection	6.3	1998-12-10
4	15.16114		Star Trek II: The Wrath of Khan	7.1	1982-06-03
5	15.076138		Star Trek: The Motion Picture	6.0	1979-12-06


# 7.2.3, Adjusted Boost Weight on Boolean Query (no listing no, modification of above listing)

In [14]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
            'should': [
            {'multi_match': { #A
               'query': usersSearch,  #User's query
                'fields': ['overview', 'title', #C
                           'directors.name', 'cast.name'],
                'type': 'cross_fields'                                
             }},
            { #B
               'match_phrase': {
                    'title': {
                        'query': 'star trek',
                        'boost': 0.1
                    }
                }
            }
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	14.413508		Star Trek: Generations	6.5	1994-11-17
2	9.744539		Showtime	5.3	2002-03-14
3	9.46119		Osmosis Jones	5.4	2001-08-07
4	9.286617		The Wild	5.0	2006-04-13
5	9.045915		Star Trek II: The Wrath of Khan	7.1	1982-06-03


# 7.2.5, Listing 3 -- Multiplicative Boosting on Title Star Trek match

In [16]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'function_score': {
            'query': {
                 'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }
             },
             'functions': [
                {
                     'weight': 2.5,
                     'filter': {
#                         'query': {
                             'match_phrase': {
                                    'title': 'star trek'
                            }
#                         }
                     }
                 }
            ]
        }
    },
    'size': 50,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	33.78181		Star Trek: Generations	6.5	1994-11-17
2	20.916115		Star Trek II: The Wrath of Khan	7.1	1982-06-03
3	19.893085		Star Trek V: The Final Frontier	5.4	1989-06-09
4	19.418203		Star Trek IV: The Voyage Home	6.7	1986-11-25
5	19.075605		Star Trek: Nemesis	6.3	2002-12-12
6	18.746922		Star Trek III: The Search for Spock	6.3	1984-05-31
7	18.324604		Star Trek: The Motion Picture	6.0	1979-12-06
8	17.433002		Star Trek: Insurrection	6.3	1998-12-10
9	16.777475		Star Trek: First Contact	6.9	1996-11-21
10	16.63797		Star Trek VI: The Undiscovered Country	6.6	1991-12-05
11	9.744539		Showtime	5.3	2002-03-14
12	9.46119		Osmosis Jones	5.4	2001-08-07
13	9.286617		The Wild	5.0	2006-04-13
14	8.662214		Miss Congeniality 2: Armed and Fabulous	5.3	2005-03-11
15	8.289814		Conspiracy Theory	6.2	1997-08-07
16	8.285913		Bill & Ted's Bogus Journey	5.8	1991-07-19
17	7.675666		Over the Hedge	6.2	2006-04-22
18	7.5218945		Fanboys	6.3	2009-02-06
19	7.498769		Miss Congeniality	6.0

# 7.3, Listing 4 -- Using a Filter Instead of A Boost

In [18]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
          'should': [
            {    'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }
            }],
          'filter': [{
#             'query': {
                'match_phrase': {
                    'title': 'star trek'
                }
#             }
          }]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	13.512724		Star Trek: Generations	6.5	1994-11-17
2	8.366446		Star Trek II: The Wrath of Khan	7.1	1982-06-03
3	7.9572344		Star Trek V: The Final Frontier	5.4	1989-06-09
4	7.767281		Star Trek IV: The Voyage Home	6.7	1986-11-25
5	7.6302423		Star Trek: Nemesis	6.3	2002-12-12


# 7.4.2, Listings 5&7 Sentinel Tokens (includes exact name matching)

In [63]:
SENTINEL_BEGIN = 'SENTINEL_BEGIN'
SENTINEL_END = 'SENTINEL_END'
def transform(esDoc):  
    esDoc['title_exact_match'] = SENTINEL_BEGIN + ' ' + esDoc['title'] + ' ' + SENTINEL_END
    esDoc['names_exact_match'] = []
    for person in esDoc['cast'] + esDoc['directors']:
        esDoc['names_exact_match'].append(SENTINEL_BEGIN + ' ' + person['name'] + ' ' + SENTINEL_END)
        
        
reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
Bulk Index into TMDB Index <[]>


# 7.4.2, Listing 6 -- Testing Exact Title Matching

In [64]:
usersSearch = 'star trek'
query = {
    'query': {
        'match_phrase': {
            'title_exact_match': {
                'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
            }
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	10.588807		Star Trek	7.3	2009-05-07


# 7.4.3, Listing 8 Boolean Boost on Exact Title Matching

In [65]:
usersSearch = 'good will hunting'
query = {
    'query': {
        'bool': {
#             'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }},
                
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	12208.437		Good Will Hunting	7.4	1997-12-05
2	9.306215		Saw V	6.1	2008-10-23
3	8.800726		The Hunt	7.9	2012-10-25
4	8.775521		Good Night, and Good Luck.	6.4	2005-09-16
5	8.248026		Pay It Forward	6.6	2000-10-12


## No Listing -- Adding a query mentions name boost

### First Attempt, search bigramed fields without modification

In [69]:
usersSearch = 'star trek patrick stewart'
query = {
    'query': {
        'bool': {
#             'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['directors.name.bigramed', 'cast.name.bigramed'],
                    'type': 'cross_fields',
                   'boost': 100
                 }}
                
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	620.3404		Star Trek: Insurrection	6.3	1998-12-10
2	604.30133		Star Trek: First Contact	6.9	1996-11-21
3	583.6142		Gnomeo & Juliet	5.9	2011-01-13
4	583.6142		Excalibur	6.7	1981-04-10
5	552.97955		Star Trek: Generations	6.5	1994-11-17


In [72]:
mappingSettings['properties'] \
               ['cast']['properties'] \
               ['name']['fields']['bigramed']['norms'] = False
        
reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
Bulk Index into TMDB Index <[]>


### Rerunning with Norms Off For Bigrams

In [46]:
usersSearch = 'star trek patrick stewart'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['directors.name.bigramed', 'cast.name.bigramed'],
                    'type': 'cross_fields',
                   'boost': 100
                 }}
                
            ]
        }
    },
    'size': 20,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.03920228		Star Trek: Insurrection	6.3	1998-12-10
2	0.03920228		Star Trek: First Contact	6.9	1996-11-21
3	0.03917096		Star Trek: Nemesis	6.3	2002-12-12
4	0.03917096		Star Trek: Generations	6.5	1994-11-17
5	0.03820324		Gnomeo & Juliet	5.9	2011-01-13
6	0.03820324		Excalibur	6.7	1981-04-10
7	0.03818829		X-Men: Days of Future Past	7.7	2014-05-23
8	0.038187582		Conspiracy Theory	6.2	1997-08-07
9	0.038187582		The Wolverine	6.4	2013-07-25
10	0.038187582		Dune	6.5	1984-12-14
11	0.038187582		X-Men	6.5	2000-07-14
12	0.038171925		X2: X-Men United	6.5	2003-04-27
13	0.038171925		TMNT	6.0	2007-03-22
14	0.038171925		The Prince of Egypt	6.7	1998-12-15
15	0.038171925		X-Men: The Last Stand	6.1	2006-05-26
16	0.03815442		Chicken Little	5.4	2005-11-04
17	0.00056567905		Star Trek	7.3	2009-05-07
18	0.0004525432		Star Trek: The Motion Picture	6.0	1979-12-06
19	0.0004525432		Star Trek Into Darkness	7.5	2013-05-16
20	0.0003959753		Star Trek VI: The Undiscovered Country	6.6	1

# 7.4.4.1 Exact Name Matching Function Query Skeleton using TF*IDF (no listing number)

In [77]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_phrase': {
                    'names_exact_match': SENTINEL_BEGIN + ' william shatner ' + SENTINEL_END
                }
            },          
            'functions': [
               
            ]
        }        
    }
}
search(query)

Num	Relevance Score		Movie Title
1	10.460574		Star Trek V: The Final Frontier	5.4	1989-06-09
2	9.576538		Showtime	5.3	2002-03-14
3	9.061579		Osmosis Jones	5.4	2001-08-07
4	8.231628		Star Trek II: The Wrath of Khan	7.1	1982-06-03
5	7.8482566		Miss Congeniality 2: Armed and Fabulous	5.3	2005-03-11
6	7.6696568		Star Trek IV: The Voyage Home	6.7	1986-11-25
7	7.6696568		Miss Congeniality	6.0	2000-12-14
8	7.4990053		Over the Hedge	6.2	2006-04-22
9	7.4990053		Star Trek: The Motion Picture	6.0	1979-12-06
10	7.4990053		Star Trek III: The Search for Spock	6.3	1984-05-31


# 7.4.4.1 Exact Name Matching Function, Ignoring TF*IDF

In [99]:
query = {
    'query': {
        'function_score': {
            'query': {
                'constant_score': {
#                     'query': {
                        'filter': {
                            'term':{'names_exact_match': SENTINEL_BEGIN + ' william shatner ' + SENTINEL_END}
#                         }
                    },
                    'boost': 1000.0
                }
            },          
            'functions': [
            ]
        }        
    },
    'explain':True
}
search(query)

Num	Relevance Score		Movie Title


# 7.4.4.1, Listing 9 Turning User Rating into A Signal

In [81]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "field_value_factor": {
                 "field": "vote_average",
                 "modifier": "sqrt"
                }
            }]
        }        
    }
}
search(query)

Num	Relevance Score		Movie Title
1	2.9325757		Whiplash	8.6	2014-10-10
2	2.9154758		Feast	8.5	2014-11-07
3	2.8982754		Paperman	8.4	2012-11-02
4	2.8982754		Mommy	8.4	2014-09-19
5	2.8982754		Interstellar	8.4	2014-11-05
6	2.8982754		Avengers: Age of Ultron	8.4	2015-05-01
7	2.8809721		The Tale of the Princess Kaguya	8.3	2013-11-23
8	2.8809721		Presto	8.3	2008-06-26
9	2.8635643		The Shawshank Redemption	8.2	1994-09-14
10	2.8635643		Wolf Children	8.2	2012-08-29


# 7.4.4.2 Listing 10, Turning Recency of Release Into A Signal (first try)

In [82]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "900d",
                        "decay": 0.5
                    }
                }
            }]
        }        
    },
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.002309774		Resident Evil: Rising	6.1	2016-01-01
2	0.00071818096		Avengers: Age of Ultron	8.4	2015-05-01
3	0.0006243266		Furious 7	7.8	2015-04-03
4	0.00060271955		Home	7.2	2015-03-27
5	0.00060271955		It Follows	7.5	2015-03-27
6	0.0005818115		Insurgent	7.3	2015-03-20
7	0.00056158163		Frozen Fever	7.0	2015-03-13
8	0.00056158163		Cinderella	7.3	2015-03-13
9	0.0005420097		Chappie	7.0	2015-03-06
10	0.00052307604		Jupiter Ascending	5.6	2015-02-27


# 7.4.4.2 Listing 10, Adjusted Gaussian Decay (set scale to 15 years)

In [83]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "5500d",
                        "decay": 0.5
                    }
                }
            }]
        }        
    },
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.84997207		Resident Evil: Rising	6.1	2016-01-01
2	0.8237962		Avengers: Age of Ultron	8.4	2015-05-01
3	0.82071275		Furious 7	7.8	2015-04-03
4	0.8199391		Home	7.2	2015-03-27
5	0.8199391		It Follows	7.5	2015-03-27
6	0.8191643		Insurgent	7.3	2015-03-20
7	0.8183884		Frozen Fever	7.0	2015-03-13
8	0.8183884		Cinderella	7.3	2015-03-13
9	0.81761146		Chappie	7.0	2015-03-06
10	0.8168334		Jupiter Ascending	5.6	2015-02-27


# 7.4.4.3 -- Complete Name Boost

In [103]:
usersSearch = 'patrick stewart'
query = {
    'query': {
        'function_score': {
            'query': {
                'constant_score': {
                    'filter': {
                        'term': {
                            'names_exact_match': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END
                        }
                    },
                    'boost': 1000.0
                }            
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "5500d",
                        "decay": 0.5
                    }
                }
            },
            {
                "field_value_factor": {
                 "field": "vote_average",
                 "modifier": "sqrt"
                }
            }
            
            ]
        }        
    },
    'explain': True,
    'size': 20
}
search(query)

Num	Relevance Score		Movie Title


# Not Shown In Chapter: The Whole Shebang

In [21]:
usersSearch = 'patrick stewart'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {
                   'function_score': {
                        'query': {
                            'constant_score': {
                                'query': {
                                    'match_phrase': {
                                        'names_exact_match': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END
                                    }
                                },
                                'boost': 1000.0
                            }            
                        },          
                        'functions': [
                        {
                             "gauss": {
                                "release_date": {
                                    "origin": "now",
                                    "scale": "5500d",
                                    "decay": 0.5
                                }
                            }
                        },
                        {
                            "field_value_factor": {
                             "field": "vote_average",
                             "modifier": "sqrt"
                            }
                        }
                        
                        ]
                    }                    
                },
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }},
                
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	2764.44		X-Men: Days of Future Past	7.7	2014-05-23
2	2497.8433		The Wolverine	6.4	2013-07-25
3	2433.9927		Ted	6.3	2012-06-29
4	2274.0195		Gnomeo & Juliet	5.9	2011-01-13
5	1962.7107		TMNT	6.0	2007-03-22
