# Connection à la base de données 'fcd'

In [8]:
import pandas as pd
import pymongo
import datetime
from datetime import timedelta
import math
import pprint

client = pymongo.MongoClient('localhost')
collection = client.fcd.assets

print('done')

done


# QUERY 1 

In [9]:
import pprint


# QUERY 1
# Calculer la densité [nombre de points] sur une Zone géographique Z et une période T 

def density_list(lon, lat, rad, datemin, datemax, option, zone, periode):

    dico={}
    query01={}
    query02={}
    center=[-1,-1]
    
    # points dans une zone Z
    if zone:
        
        center=[lon,lat]
        
        query01 = {
            "location.geo": {
                "$nearSphere": {
                    "$geometry": {
                        "type": "Point",
                        "coordinates": [ lon, lat]
                    },
                    "$minDistance": 0,
                    "$maxDistance": rad*1000 # mètres
                }
            }
        }

    # points à une période T
    if periode :
        query02 = {'$and': [{'recorded_at': {"$gt": datemin}}, {'recorded_at': {"$lt": datemax}}]}


    # requête finale
    query = {'$and': [query01, query02]}

    if option != {} :
        query = {'$and': [query, option]}
        
    liste=list(collection.find(query).distinct("asset_id"))
    dico['neighbors_id']=liste
    dico['period']=[datemin, datemax]
    z = {}
    z['center'] = center
    z['radius'] = rad
    dico['zone'] = z
    
    return dico


# QUERY 2

In [17]:
import pprint

# QUERY 2
# Retourner le nombre de points voisins d'un asset X dans une période T.

def time_gap(current, new):

    td = abs(current - new)
    res = td/60 # in minutes
    return res


def asset_density(id_asset, rad, datemin, datemax):

    # les points d'un asset X
    query01 = {"asset_id": id_asset}

    # points à une période T
    query02 = {'$and': [{'recorded_at': {"$gt": datemin}}, {'recorded_at': {"$lt": datemax}}]}

    # les points d'un asset X à une période T
    query03 = {'$and': [query01, query02]}

    tmp = list(collection.find(query03))
    
    res = {}
    liste = []
    
    if len(tmp) > 0:
        
        # tri chronologique
        tmp = sorted(tmp, key=lambda point:point['recorded_at'])

        # selection des coords du point au 'central'  
        lon = tmp[int(len(tmp)-1/2)]['location']['geo']['coordinates'][0]
        lat = tmp[int(len(tmp)-1/2)]['location']['geo']['coordinates'][1]         

        d_list = density_list(lon, lat, rad, datemin, datemax,
                              {"asset_id": {"$ne": id_asset}}, True, True)
        liste = d_list['neighbors_id']
    
    res['asset_id'] = id_asset
    res['neighbors_id'] = liste
    res['radius'] = rad
    res['period'] = [datemin, datemax]

    return res


# QUERY 3

In [5]:
# QUERY 3
# Sur la base de tous les assets, retourner le Top – k [e.g. top 10] des zones les plus denses


def top_k_assets(rad, datemin, datemax):
    
    # list des id des différents asset
    ids_asset = list(collection.distinct("asset_id"))

    zones = []

    for id_ in ids_asset:

        zone = asset_density(id_, rad, datemin, datemax)

        if zone != {}:
            zones.append(zone)
        
    return zones


# Résumé q2 et q3

In [None]:
def insert_into_summary(col, data):
    print('insertion...')
    col.insert_many(data)
    print('insertion done')
    
    
def insert_summary_neighbours_count(client, debut, rad):
    
     # list des id des différents asset
    ids_asset = list(collection.distinct("asset_id"))
    
    liste = list(collection.find({'recorded_at': {"$gt": debut}}))
    liste = sorted(liste, key=lambda point:point['recorded_at'])
    
    delta = datetime.timedelta(minutes=10).total_seconds()
    
    fin = liste[len(liste)-1]['recorded_at']
    datemin = liste[0]['recorded_at']
    datemax = datemin + delta
    
    summ = []
    
    while datemin < fin:
        
        for id_ in ids_asset:       
            d = asset_density(id_, rad, datemin, datemax)

            if len(d['neighbors_id']) > 0:
                summ.append(d)

        datemin = datemax
        datemax = datemin + delta

    print('calculation done')
    
    col = client.fcd.top_asset_density
    insert_into_summary(col, summ)
    

date = datetime.datetime(2020, 3, 1, 0, 0, 0)
timestamp = datetime.datetime.timestamp(date)

insert_summary_neighbours_count(client, timestamp, 10)
print('\ndone')

# Query4

In [17]:
import math
import datetime

# Query 4
# Retournez le top k des périodes/ zones les plus denses 


# in meters
def distance(lat1, lat2, lon1, lon2):
    R = 6372800  # Earth radius in meters
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2) ** 2
    d = 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return d


def top_k_zones(rad, option):
    
    # toutes les points de la base de données
    base = list(collection.find())
    
    # tri selon la longitude puis la latitude
    base = sorted(base, key=lambda point:point['location']['geo']['coordinates'])
    print('sorting done\n')
    
    
    zones = []
    lon = 0
    lat = 0
    nextP = False
    
    for point in base:
        
        if distance(lat, point['location']['geo']['coordinates'][1],
                    lon, point['location']['geo']['coordinates'][0]) >= rad*1000*2:
            
            if len(zones) > 0:
                for z in zones:
                     
                    if distance(z['zone']['center'][1], point['location']['geo']['coordinates'][1],
                                z['zone']['center'][0], point['location']['geo']['coordinates'][0]) < rad*1000*2:
                        nextP = True
                        break
            if nextP:
                nextP = False
                continue
                
            lon = point['location']['geo']['coordinates'][0]
            lat = point['location']['geo']['coordinates'][1]
            
            d = density_list(lon, lat, rad, 0, 0, option, True, False)
            
            if len(d['neighbors_id']) > 0:
                del d['period']
                zones.append(d)
        
    return zones


def top_k_periods(delta, option, begin):
    
    # toutes les points de la base de données
    base = list(collection.find({'recorded_at': {"$gt": begin}}))
    base = sorted(base, key=lambda point:point['recorded_at'])
    print('sorting done\n')
    
    
    zones = []
    
    # intervalle de temps en minutes converti en secondes
    delta = datetime.timedelta(minutes=delta).total_seconds()
    
    end = base[len(base)-1]['recorded_at']
    datemin = base[0]['recorded_at']
    datemax = datemin + delta
    
    
    while datemin < end:
        
        d = density_list(-1, -1, -1, datemin, datemax, option, False, True)
        
        if len(d['neighbors_id']) > 0:
            del d['zone']
            zones.append(d)
             
        datemin = datemax
        datemax = datemin + delta
        
        
    return zones


# Résumé pour top periods

In [18]:
def insert_summary_top_periods(client, begin):
    
    summ = top_k_periods(10, {}, begin)
    print('calculation done')
    
    col = client.fcd.top_periods
    insert_into_summary(col, summ)
    
    

date = datetime.datetime(2020, 3, 3, 0, 0, 0)
timestamp = datetime.datetime.timestamp(date)

#insert_summary_top_periods(client, timestamp)
print('\ndone')


done


# Résumé pour top zones

In [19]:
def insert_summary_top_zones(client, begin):
    
    summ = []
    rads = [2, 5, 10, 20, 30]
    op = {'recorded_at': {"$gt": begin}}
    
    for rad in rads:
        summ.extend(top_k_zones(rad, op))
        print("rad =", rad, "done")
        
    print('calculation done')
    
    col = client.fcd.top_zones
    insert_into_summary(col, summ)
    
    
date = datetime.datetime(2020, 3, 1, 0, 0, 0)
timestamp = datetime.datetime.timestamp(date)

#insert_summary_top_zones(client, timestamp)
print('\ndone')


done


# Query 4_2, top selon la zone et la période

In [20]:
def top_k_zones_periods(rad, delta, begin):
    
    base = list(collection.find({'recorded_at': {"$gt": begin}}))
    base = sorted(base, key=lambda point:point['recorded_at'])
    print('\nsorting done')
    
    # intervalle de temps en minutes converti en secondes
    delta = datetime.timedelta(minutes=delta).total_seconds()
    
    end = base[len(base)-1]['recorded_at']
    datemin = base[0]['recorded_at']
    datemax = datemin + delta
 
    zones = []
    nextP = False
    
    while datemin < end:
        
        d1 = datemin
        d2 = datemax
        
        datemin = datemax
        datemax = datemin + delta  
        
        # toutes les points de la base de données
        op = {'$and': [{'recorded_at': {"$gt": d1}}, {'recorded_at': {"$lt": d2}}]}
        base = list(collection.find(op))
        
        if len(base) == 0:
            continue
        
        # tri selon la longitude puis la latitude
        base = sorted(base, key=lambda point:point['location']['geo']['coordinates'])

        lon = 0
        lat = 0
        
        for point in base:

            if distance(lat, point['location']['geo']['coordinates'][1],
                        lon, point['location']['geo']['coordinates'][0]) >= rad*1000*2:

                if len(zones) > 0:
                    for z in zones:

                        if distance(z['zone']['center'][1], point['location']['geo']['coordinates'][1],
                                    z['zone']['center'][0], point['location']['geo']['coordinates'][0]) < rad*1000*2:
                            nextP = True
                            break

                if nextP:
                    nextP = False
                    continue

                lon = point['location']['geo']['coordinates'][0]
                lat = point['location']['geo']['coordinates'][1]     
                    
                zones.append(density_list(lon, lat, rad, d1, d2, {}, True, False))
                
    return zones


# Résumé pour top zones_periods

In [14]:
def insert_summary_top_zones_periods(client, begin):
    
    summ = []
    rads = [2, 5, 10, 20, 30]
    
    for rad in rads:
        summ.extend(top_k_zones_periods(rad, 10, begin))
        print("rad =", rad, "done")
        
    print('calculation done')
    
    col = client.fcd.top_zones_periods
    insert_into_summary(col, summ)
    
    
date = datetime.datetime(2020, 3, 1, 0, 0, 0)
timestamp = datetime.datetime.timestamp(date)

#insert_summary_top_zones_periods(client, timestamp)
print('\ndone')


sorting done
rad = 2 done

sorting done
rad = 5 done

sorting done
rad = 10 done

sorting done
rad = 20 done

sorting done
rad = 30 done
calculation done
insertion...
insertion done

done


# Query 5

In [9]:
# QUERY 5
# Retournez le top k des périodes T où nous avons le plus de voitures qui sont en circulation

# les trajets (sans stay points)
op = {"moving": 1}

#zones = top_k_periods(delta, op)
#pprint.pprint(zones)

print('\ndone')


done


# Résumé pour top periods des voitures en circulation

In [None]:
def insert_summary_top_periods_moving(client, begin):
    
    summ = top_k_periods(10, {"moving": 1}, begin)
    print('calculation done')
    
    col = client.fcd.top_periods_moving
    insert_into_summary(col, summ)
    
    

date = datetime.datetime(2020, 3, 3, 0, 0, 0)
timestamp = datetime.datetime.timestamp(date)

#insert_summary_top_periods_moving(client, timestamp)
print('\ndone')