In [1]:
import redis 
import json
from numpy import std
from geopy import distance

REDIS_HOST = "localhost"
REDIS_PORT = 6379
REDIS = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)


In [2]:
def create_redis_db(file):
    with open(file, 'r') as json_file:
        flag = 0 
        for line in json_file:
            data = json.loads(line)
            if not isinstance(data['_id'], int):
                continue

            review_string = ''

            reviews = data['reviews']
            for review in reviews : 
                flag += 1 
                try: 
                    review_dictionnary =  {
                        'rating': review['rating'],
                        'source': review['source'],
                    }
                    REDIS.set(flag, json.dumps(review_dictionnary))
                    review_string += str(flag)+','
                except TypeError:
                    continue
            
            REDIS.set(data['category']+','+str(data['_id'])+'_', review_string)

            coordinates = data['location']['coord']['coordinates']
            REDIS.set(data['category']+';'+str(coordinates[0])+';'+str(coordinates[1]), data['name'])

create_redis_db('tourPedia_paris.json')
print('Done')

Done


In [3]:
def average_rating(id_location):
    # id_location is a string

    key = REDIS.keys("*"+id_location+'_')[0]
    reviews = REDIS.get(key).decode().split(',')
    ratings = []
    for id_review in reviews:
        if id_review:
            ratings.append(json.loads(REDIS.get(id_review))['rating'])
    if len(ratings) == 0:
        return None
    return sum(ratings)/len(ratings)

print(average_rating('83408'))

0.5384615384615384


In [4]:
def standard_deviation(id_location):
    # id_location is a string

    key = REDIS.keys("*"+id_location+'_')[0]
    reviews = REDIS.get(key).decode().split(',')
    ratings = []
    for id_review in reviews:
        if id_review:
            ratings.append(json.loads(REDIS.get(id_review))['rating'])
    if len(ratings) == 0:
        return None
    return std(ratings)

print(standard_deviation('83408'))

1.3076923076923077


In [5]:
def near_place(coord_location, category):
    keys = REDIS.keys(category+";*")
    near_places = {}
    for key in keys:
        location_coord = key.decode().split(';')[1:]
        location_coord = ( float(location_coord[0]), float(location_coord[1]))
        if distance.distance(location_coord, coord_location).km < 4: # it's near if it's distance is below 4 kilometers
            near_places[REDIS.get(key).decode()] = distance.distance(location_coord, coord_location).km
    return near_places

coord_eiffel = (2.2944991, 48.8582602)

print(near_place(coord_eiffel, 'restaurant'))

{'Le Central Café': 3.111749900897771, 'Le Monde des Olives': 2.4424568533462767, 'Café di Roma': 1.8398206811418385, 'Les montparnos': 2.9532728414636162, 'AMAP Miam-14': 3.9478491981760455, 'Legrand Thierry': 3.2629064720945387, 'Faye Paris-Gastronomie on Line': 1.806874012917941, "L'Horizon": 3.7805509495846903, 'Le Percier': 2.822459319969775, 'Boucherie Seynat': 1.9788290850909318, 'Bouharati Ahmed': 3.772404315474711, "Les thés d'Emilie": 3.917508819263822, 'EXKi': 1.554200679609736, 'Péniche Le Grand Bleu': 0.3869257819211424, 'Margoton-Boulangerie-Pâtisserie': 2.3670595623093362, 'Sci Garage 4 Rue Saint-Florentin': 3.47638601625639, 'Fromagerie Pergolèse': 2.220242653356966, 'Les Prolongations': 0.9026508434504648, "L'Atelier des Compères": 1.5196638809363536, 'QUALITÉ & CO': 2.1770162958422867, 'OCINQ': 3.839260071828505, "L'Opportun": 3.7405774925947255, 'Les Cormorans': 3.8844923808297045, 'Brasserie Niel': 2.372785985886341, "McDonald's": 1.112490043869443, 'Da Zavola': 1.6

In [6]:
def well_rated(id_location): 
    rating = average_rating(id_location)
    key = REDIS.keys("*"+id_location+'_')[0]
    reviews = REDIS.get(key).decode().split(',')
    number_of_reviews = len(reviews)
    sources = set()
    for id_review in reviews:
        if id_review:
            sources.add(json.loads(REDIS.get(id_review))['source'])
    number_sources = len(sources)

    score = rating
    if number_sources == 3:
        score += 1 # if the three sources are present in the ratings of the location, +1
    if number_of_reviews >= 10: 
        score  += 1 # if there is more than 10 ratings, +1

    if score >= 6 : 
        return str(score) + " : it's a very good score"
    if score <= 6 and score >= 4 : 
        return str(score) + " : it's a quite good score"
    if score <= 2:
        return str(score) + " : it's a very bad score"
    return str(score) + " : it's a bad score"

print(well_rated('86620'))

3.2222222222222223 : it's a bad score
