# Taller 5.2  Mongo Map Reduce

Juan Navarro, <jsnavarroa@unal.edu.co>

In [21]:
from pymongo import MongoClient
from pprint import pprint
from bson.code import Code
import pandas as pd

In [None]:
client = MongoClient('localhost', 27017)

# Get the database and collection
db = client.bda
collection = db.restaurants

pprint(db.restaurants.find_one())

## Cuenta los restaurantes agrupados por zipcode usando Map reduce

A continuación se realiza la implementación del conteo de restaurantes usando el código del ejemplo y la suma.

In [48]:
mapper = Code("""
                function () {
                    emit(this.address.zipcode, this.restaurant_id);
                }
               """)

reducer = Code("""
                function (zipcode, restaurants) {
                    return restaurants.length;
                }
                """)

mapper_sum = Code("""
                function () {
                    emit(this.address.zipcode, 1);
                }
               """)

reducer_sum = Code("""
                function (key, values) {
                    return Array.sum(values);
                }
                """)

df = pd.DataFrame(columns={'_id'}).set_index('_id')

# Group the mappers and reducers
variants = (
    ('Count_Length', mapper, reducer, 'restaurants_count'),
    ('Count_Sum', mapper_sum, reducer_sum, 'restaurants_count_sum')
)

for name, mapper, reducer, out in variants:
    
    # Execute the map reduce
    result = db.restaurants.map_reduce(mapper, reducer, out=out)
    cursor = result.find()

    # Expand the cursor and construct the DataFrame
    other = pd.DataFrame(list(cursor)).set_index('_id')
    
    # Join the results
    df =  df.join(other, how='outer', rsuffix=name)
    df = df.rename(columns={ df.columns[-1]: name })

# Print the results
df = df.rename_axis('zipcode', axis='index')
df.transpose()

zipcode,10001,10002,10003,10004,10005,10006,10007,10009,10010,10011,...,11432,11433,11434,11435,11436,11691,11692,11693,11694,11697
Count_Length,38,21,65,19,8,6,8,23,11,39,...,12,40897493,6,14,40824512,2,40550548,2,7,40366356
Count_Sum,75,29,139,25,15,13,16,46,27,90,...,21,1,9,22,1,4,1,3,12,1


La función del ejemplo no es idempotente, por lo tanto el valor reportado para el conteo es incorrecto. Por ejemplo si se ejecuta el siguiente código en Mongo Shell el resultado es 2, cuando debe ser 3, porque hay 3 elementos en total.

```javascript

var reducer = function (zipcode, restaurants) {
  return restaurants.length;;
};

var myKey = 'myKey';
var valuesIdempotent = [1,
                         reducer(myKey, [ 1, 1 ] )
                       ];
printjson(reducer(myKey, valuesIdempotent)); // 2, should be 3
```

# Cuente los restaurantes que están entre las latitudes -75, -74 y las longitudes 40 y 42

# Calcule el promedio de los puntajes agrupados por zipcode

# Liste el restaurante top(de acuerdo a su score) de cada tipo de cuisine