In [1]:
%matplotlib notebook

In [2]:
import os

os.environ["PYSPARK_PYTHON"]="/usr/local/bin/python3"
os.environ["PYSPARK_DRIVER_PYTHON"]="/usr/local/bin/python3"

In [3]:
import pyspark
sc = pyspark.sql.SparkSession.Builder().getOrCreate()

In [4]:
import json
import numpy as np
import pandas as pd
import seaborn as sn

import matplotlib.pyplot as plt
from IPython.display import display, HTML
import math
import time
import matplotlib.animation as animation
from datetime import datetime, timedelta
import pyspark.sql.functions as F

from utils.emissions_normalizer import EmissionsNormalizer
from utils.knn_predictor import KnnPredictor
from utils.smoother import Smoother
from utils.random_predictor import RandomPredictor

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [5]:
ANTENNAS_POSITION = [(464259.981343845,6110331.85100085),(463512.015195402,6111004.324434620),(464443.295130103,6111377.26171875),(464629.562194595,6111105.34734669)]
ANTENNAS_NAMES = ["RC1", "RC2", "D1", "D2"] 
X_0 = 462385.503783397
Y_0 = 6109042.35153865

for i in range(4):
    ANTENNAS_POSITION[i] = (ANTENNAS_POSITION[i][0]-X_0, ANTENNAS_POSITION[i][1]-Y_0)

GRID = Polygon(ANTENNAS_POSITION)

In [6]:

FEMALES = [11, 17, 20, 24, 26, 28, 30, 33, 34]
MALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32]


#FEMALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32] + [11, 17, 20, 24, 26, 28, 30, 33, 34]
#MALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32] + [11, 17, 20, 24, 26, 28, 30, 33, 34]

## Entreno el modelo que vamos a usar para predecir

In [7]:
points_recep = sc.read.json('datos/train-test-by-emission.jsonlines/').rdd

In [8]:
normalizer = EmissionsNormalizer()
data = normalizer.normalize(points_recep)
regre_data, regre_target = normalizer.get_regression_dataframes(data)

In [9]:
print(regre_target.head())
print(regre_data.head())

             0            1
0  2854.782012  2192.894689
1  2854.782012  2192.894689
2  2854.782012  2192.894689
3  2854.782012  2192.894689
4  2854.782012  2192.894689
   antenna_0  antenna_1  antenna_2  antenna_3
0          0          0          0         29
1          0          0          0         70
2          0          0          0         28
3          0          0          0         58
4          0          0          0         81


In [10]:
predictor = KnnPredictor()
predictor.fit(regre_data, regre_target)

### Levanto el chekpoint en pandas

In [11]:
birds_data_complete = pd.read_parquet('tmp/checkpoint-cog-7-7.parquet')
birds_data = birds_data_complete.drop(columns=['tag_id','timestamp'])

### Obtengo las predicciones

In [12]:
predictions = predictor.predict(birds_data)
predictions = pd.concat([pd.DataFrame(predictions), pd.DataFrame(birds_data_complete)], axis=1).values

In [13]:
predictions = pd.DataFrame(predictions, columns=['x', 'y', 'recep_0', 'recep_1', 'recep_2', 'recep_3', 'tag', 'time'])

In [52]:
predictions['day'] = predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
predictions['hour'] = predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
predictions['minute'] = predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
predictions['second'] = predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
predictions['min_sec'] = (predictions['minute'] * 60 + predictions['second']).div(15).astype(int)

In [53]:
#aux = predictions.head(n=2000).copy()
#aux_u = aux.groupby(['tag','day','hour','min_sec']).apply(lambda x: x.iloc[0]).reset_index(drop=True)

In [54]:
i = 0
date_index_map = {}

for day in range(10,26):
    for hour in range(7,19):
        for min_sec in range(0,int(60*60/15)):
            date_index_map[(day, hour, min_sec)] = i
            i +=1

index_date_map  = {v: k for k, v in date_index_map.items()}

In [55]:
predictions['date_index'] = predictions.apply(lambda x: date_index_map[(x['day'],x['hour'],x['min_sec'])], axis=1)

In [56]:
predictions.head()

Unnamed: 0,x,y,recep_0,recep_1,recep_2,recep_3,tag,time,day,hour,minute,second,min_sec,date_index
0,1452.51,1809.46,0,0,33,0,28,2018-01-10 07:00:01,10,7,0,1,0,0
1,1523.94,1922.76,30,0,0,0,24,2018-01-10 07:00:03,10,7,0,3,0,0
2,1760.4,1879.44,0,0,64,0,28,2018-01-10 07:00:06,10,7,0,6,0,0
3,1842.04,2096.42,0,0,41,0,28,2018-01-10 07:00:11,10,7,0,11,0,0
4,1712.29,1920.26,33,0,0,0,24,2018-01-10 07:00:13,10,7,0,13,0,0


In [57]:
def distance_between(x1, x2, y1, y2):
    return math.hypot(x2-x1, y2-y1)


In [58]:
def found_groups(rows):
    groups = {}
    for b in FEMALES+MALES:
        groups[b] = []
    
    rows_l = rows.values.tolist()
    for i in range(len(rows_l)):
        for j in range(i+1,len(rows_l)):
            distance = distance_between(rows_l[i][0], rows_l[j][0], rows_l[i][1], rows_l[j][1])
            if distance < 300:
                b1 = rows_l[i][6]
                b2 = rows_l[j][6]


                groups[b1].append(b2)
                groups[b2].append(b1)
    #print(groups)

                
    for bird, g in groups.items():
        if len(g) == 0:
            continue
        for adyacent_bird in g:
            if bird == adyacent_bird:
                continue
            groups[adyacent_bird] = list(set(groups[adyacent_bird] + groups[bird]))
            
    visited = {}
    for b in FEMALES+MALES:
        visited[b] = False
    
    res = []
    for start_bird in FEMALES+MALES:
        associated_birds = []
        queue = [start_bird]
        while len(queue) > 0:
            bird = queue.pop()
            if visited[bird]:
                continue
            visited[bird] = True
            associated_birds.append(bird)
            queue = queue + groups[bird]
        

        if len(associated_birds) > 1:
            res.append(associated_birds)

    return res

In [59]:
birds_network = predictions.groupby(['date_index']).apply(found_groups).reset_index()

In [60]:
birds_network

Unnamed: 0,date_index,0
0,0,"[[24, 28]]"
1,1,[]
2,2,[]
3,3,[]
4,4,[]
...,...,...
42227,46075,[]
42228,46076,[]
42229,46077,"[[26, 27]]"
42230,46078,"[[26, 30, 27]]"


In [61]:
birds_network_list = []

for row in birds_network.iterrows():
    for g in row[1][0]:
        day = index_date_map[row[1]['date_index']][0]
        hour = index_date_map[row[1]['date_index']][1]
        min_sec = index_date_map[row[1]['date_index']][2]
        minute = 0
        sec = (min_sec*15)%60
        #if min_sec >= 60:
            #minute = 1
        minute = min_sec*15/60
        if int(minute) != minute:
            minute = int(minute)+1
        minute = int(minute)
        
        if minute < 10:
            minute = f'0{minute}'
        if sec < 10:
            sec = f'0{sec}'
        if hour < 10:
            hour = f'0{hour}'
        
        birds_network_list.append((f'2018-01-{day} {hour}:{minute}:{sec}', row[1]['date_index'], g))

In [62]:
res = pd.DataFrame(birds_network_list, columns=['date', 'date_index', 'group'])

In [63]:
res.to_csv('social_network_matrix_15.csv', index=0)

In [64]:
res

Unnamed: 0,date,date_index,group
0,2018-01-10 07:00:00,0,"[24, 28]"
1,2018-01-10 07:02:15,5,"[24, 28]"
2,2018-01-10 07:05:15,17,"[24, 28]"
3,2018-01-10 07:05:00,20,"[28, 21]"
4,2018-01-10 07:06:15,21,"[24, 28]"
...,...,...,...
39110,2018-01-25 18:58:30,46070,"[34, 29]"
39111,2018-01-25 18:58:45,46071,"[26, 27]"
39112,2018-01-25 18:59:15,46073,"[26, 22]"
39113,2018-01-25 18:60:15,46077,"[26, 27]"


In [169]:
res

Unnamed: 0,date,date_index,group
0,2018-01-10 07:00:00,0,"[24, 28]"
1,2018-01-10 07:03:00,2,"[24, 28]"
2,2018-01-10 07:05:30,3,"[20, 28, 24, 21, 14, 33, 32]"
3,2018-01-10 07:06:00,4,"[20, 28, 24, 21, 14, 33, 32]"
4,2018-01-10 07:08:30,5,"[24, 28, 14, 33, 32]"
...,...,...,...
7944,2018-01-25 18:56:30,7677,"[20, 30, 29, 27, 26, 24, 21, 14, 34]"
7945,2018-01-25 18:57:00,7678,"[20, 29, 21, 34]"
7946,2018-01-25 18:57:00,7678,"[26, 30, 27, 25, 22, 14]"
7947,2018-01-25 18:59:30,7679,"[24, 32]"


In [65]:
res.date.str.split(' ').str[1].str.split(':').str[0].

<pandas.core.strings.StringMethods at 0x7ff149e6f5e0>

In [170]:
len(res[
    (res.group.apply(lambda x: 34 in x and 32 in x)) &
    (res.date.str.split(' ').str[1].str.split(':').str[0].isin(['07','08','09']))
])

253

In [171]:
len(res[
    (res.group.apply(lambda x: 34 in x and 32 in x)) &
    (res.date.str.split(' ').str[1].str.split(':').str[0].isin(['10','11','12']))
])

124

In [172]:
len(res[
    (res.group.apply(lambda x: 34 in x and 32 in x)) &
    (res.date.str.split(' ').str[1].str.split(':').str[0].isin(['13','14','15']))
])

127

In [174]:
len(res[
    (res.group.apply(lambda x: 34 in x and 32 in x)) & 
    (res.date.str.split(' ').str[1].str.split(':').str[0].isin(['16','17','18']))
])

190