In [1]:
%matplotlib notebook

In [2]:
import os

os.environ["PYSPARK_PYTHON"]="/usr/local/bin/python3"
os.environ["PYSPARK_DRIVER_PYTHON"]="/usr/local/bin/python3"

In [3]:
import pyspark
sc = pyspark.sql.SparkSession.Builder().getOrCreate()

In [4]:
import json
import numpy as np
import pandas as pd
import seaborn as sn

import matplotlib.pyplot as plt
from IPython.display import display, HTML
import math
import time
import matplotlib.animation as animation
from datetime import datetime, timedelta
import pyspark.sql.functions as F

from utils.emissions_normalizer import EmissionsNormalizer
from utils.knn_predictor import KnnPredictor
from utils.smoother import Smoother
from utils.random_predictor import RandomPredictor

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [5]:
ANTENNAS_POSITION = [(464259.981343845,6110331.85100085),(463512.015195402,6111004.324434620),(464443.295130103,6111377.26171875),(464629.562194595,6111105.34734669)]
ANTENNAS_NAMES = ["RC1", "RC2", "D1", "D2"] 
X_0 = 462385.503783397
Y_0 = 6109042.35153865

for i in range(4):
    ANTENNAS_POSITION[i] = (ANTENNAS_POSITION[i][0]-X_0, ANTENNAS_POSITION[i][1]-Y_0)

GRID = Polygon(ANTENNAS_POSITION)

In [17]:

#FEMALES = [11, 17, 20, 24, 26, 28, 30, 33, 34]
#MALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32]

FEMALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32] + [11, 17, 20, 24, 26, 28, 30, 33, 34]
MALES = [10, 14, 21, 22, 23, 25, 27, 29, 31, 32] + [11, 17, 20, 24, 26, 28, 30, 33, 34]

## Entreno el modelo que vamos a usar para predecir

In [7]:
points_recep = sc.read.json('datos/train-test-by-emission.jsonlines/').rdd

In [8]:
normalizer = EmissionsNormalizer()
data = normalizer.normalize(points_recep)
regre_data, regre_target = normalizer.get_regression_dataframes(data)

In [9]:
print(regre_target.head())
print(regre_data.head())

             0            1
0  2854.782012  2192.894689
1  2854.782012  2192.894689
2  2854.782012  2192.894689
3  2854.782012  2192.894689
4  2854.782012  2192.894689
   antenna_0  antenna_1  antenna_2  antenna_3
0          0          0          0         29
1          0          0          0         70
2          0          0          0         28
3          0          0          0         58
4          0          0          0         81


In [10]:
predictor = KnnPredictor()
predictor.fit(regre_data, regre_target)

### Levanto el chekpoint en pandas

In [11]:
birds_data_complete = pd.read_parquet('tmp/checkpoint-cog-7-7.parquet')
birds_data = birds_data_complete.drop(columns=['tag_id','timestamp'])

### Obtengo las predicciones

In [12]:
predictions = predictor.predict(birds_data)
predictions = pd.concat([pd.DataFrame(predictions), pd.DataFrame(birds_data_complete)], axis=1).values

## Analizo coocurrencia en regiones uniformes

### Calculo en que region cayo la prediccion

In [13]:
X_REGION_SIZE = 300
Y_REGION_SIZE = 300


def set_prediction_in_regions_classification(predictions):
    grid_predictions = []
    for i in range(len(predictions)):
        prediction = predictions[i]
        point = Point(prediction[0], prediction[1])

        region = '{}-{}'.format(int(prediction[0]/X_REGION_SIZE), int(prediction[1]/Y_REGION_SIZE))

        grid_pred = np.insert(prediction, 8,region, axis=0)
        grid_predictions.append(grid_pred)
    return pd.DataFrame(grid_predictions, columns=['x', 'y', 'recep_0', 'recep_1', 'recep_2', 'recep_3', 'tag', 'time', 'region'])


### Obtengo el tiempo que cada posible pareja estuvo junta

In [14]:
predictions = set_prediction_in_regions_classification(predictions)

In [18]:
def get_coocurrence_with_window(birds_predictions, window_size):
    birds_predictions['day'] = birds_predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
    birds_predictions['hour'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
    birds_predictions['minute'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
    birds_predictions['second'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
    birds_predictions['min_sec'] = (birds_predictions['minute'] * 60 + birds_predictions['second']).div(window_size).astype(int)
    
    males = birds_predictions[birds_predictions.tag.isin(MALES)][['tag','region','day','hour', 'min_sec']]
    females = birds_predictions[birds_predictions.tag.isin(FEMALES)][['tag','region','day','hour', 'min_sec']]
    
    coocurrence = pd.merge(females, males, on=['region', 'day','hour','min_sec'])
    coocurrence_u = coocurrence.groupby(['tag_x', 'tag_y','day','hour','min_sec']).apply(lambda x: x.iloc[0]).reset_index(drop=True)
    coocurrence_dict = coocurrence_u.groupby(['tag_x', 'tag_y']).size().to_dict()
    matrix_res = [[]] * len(FEMALES)
    for i in range(len(FEMALES)):
        matrix_res[i] = []
        for m in MALES:
            matrix_res[i].append(coocurrence_dict.get((FEMALES[i],m),0))
    matrix_res_pd = pd.DataFrame(matrix_res)
    matrix_res_pd.columns = MALES
    matrix_res_pd.rename(index={i:FEMALES[i] for i in range(len(FEMALES))}, inplace=True)
    return matrix_res_pd

In [16]:
windows = (5,90,120)

In [19]:
results = []
for window in windows:
    results.append(get_coocurrence_with_window(predictions, window))

In [21]:
for i, window in enumerate(windows):
    results[i].to_csv(f'matrix_coocurrence_with_window_{window}.csv')

In [22]:
display(results[0])
display(results[1])
display(results[2])

Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,3582,21,111,3,17,2,236,118,58,74,469,61,186,138,7,57,48,70,102
14,21,16835,205,182,51,86,203,146,309,59,9,392,406,229,121,263,393,91,150
21,111,205,23645,105,157,21,265,258,354,605,101,448,4416,504,81,189,246,708,323
22,3,182,105,9320,29,81,48,69,116,37,0,279,195,137,133,75,223,63,66
23,17,51,157,29,6475,6,104,29,74,104,13,55,203,956,18,25,65,124,57
25,2,86,21,81,6,3594,4,31,17,15,3,110,52,10,486,14,102,17,25
27,236,203,265,48,104,4,17823,652,163,446,200,351,513,583,16,47,151,479,684
29,118,146,258,69,29,31,652,18465,86,612,146,338,521,393,57,68,133,730,4242
31,58,309,354,116,74,17,163,86,20561,508,30,990,727,919,35,163,2188,561,87
32,74,59,605,37,104,15,446,612,508,37808,68,464,1154,963,26,54,319,6508,609


Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,361,39,78,11,13,6,114,86,55,82,218,66,119,100,8,42,45,85,95
14,39,1743,281,239,76,104,218,174,325,155,27,469,515,293,177,205,361,199,186
21,78,281,2661,192,134,64,285,285,342,642,63,442,2033,484,130,166,240,619,296
22,11,239,192,1213,53,90,81,95,164,97,6,333,326,202,162,101,184,117,104
23,13,76,134,53,739,15,86,54,105,120,7,82,165,480,18,32,76,124,48
25,6,104,64,90,15,531,16,37,37,33,6,158,108,34,356,24,103,38,47
27,114,218,285,81,86,16,1622,362,176,422,113,238,460,416,50,80,162,482,349
29,86,174,285,95,54,37,362,1726,110,462,100,269,448,387,72,85,133,547,1330
31,55,325,342,164,105,37,176,110,1909,374,52,510,576,498,59,171,739,394,113
32,82,155,642,97,120,33,422,462,374,3598,60,485,1013,855,107,89,206,2884,447


Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,289,38,67,12,10,5,92,79,48,73,187,67,103,87,10,39,42,73,74
14,38,1411,267,224,76,97,196,161,289,159,23,426,464,279,169,173,316,194,172
21,67,267,2172,186,119,64,268,272,313,596,59,414,1732,454,124,158,220,575,272
22,12,224,186,1016,52,93,84,88,157,105,9,310,316,206,152,96,181,124,92
23,10,76,119,52,606,17,74,47,107,118,6,84,151,422,19,31,79,122,44
25,5,97,64,93,17,456,16,42,34,31,5,145,107,36,316,27,98,33,44
27,92,196,268,84,74,16,1282,316,155,373,97,220,415,376,49,81,150,442,301
29,79,161,272,88,47,42,316,1379,110,410,84,261,411,360,71,75,123,490,1090
31,48,289,313,157,107,34,155,110,1517,343,51,440,507,441,61,147,632,356,105
32,73,159,596,105,118,31,373,410,343,2876,56,447,911,773,114,95,198,2396,404


In [23]:
def get_ocurrence_totals(birds_predictions, window_size):
    birds_predictions['day'] = birds_predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
    birds_predictions['hour'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
    birds_predictions['minute'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
    birds_predictions['second'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
    birds_predictions['min_sec'] = (birds_predictions['minute'] * 60 + birds_predictions['second']).div(window_size).astype(int)
        
    males = birds_predictions[birds_predictions.tag.isin(MALES)][['tag','region','day','hour', 'min_sec']]
    females = birds_predictions[birds_predictions.tag.isin(FEMALES)][['tag','region','day','hour', 'min_sec']]


    full_buckets_data = {'tag_x':[],'tag_y':[],'day':[], 'hour':[], 'min_sec':[]}
    for day in range(10,26):
        for hour in range(7,19):
            for min_sec in range(0,int(60*60/window_size)):
                for male in MALES:
                    for female in FEMALES:
                        full_buckets_data['tag_x'].append(male)
                        full_buckets_data['tag_y'].append(female)
                        full_buckets_data['day'].append(day)
                        full_buckets_data['hour'].append(hour)
                        full_buckets_data['min_sec'].append(min_sec)

    full_buckets_data_pd = pd.DataFrame(full_buckets_data)

    males = males.rename(columns={'tag':'tag_x'})
    females = females.rename(columns={'tag':'tag_y'})
    no_ocurrence = pd.merge(full_buckets_data_pd, males, on=['tag_x','day','hour','min_sec'], how='left')
    no_ocurrence = no_ocurrence[no_ocurrence.region.isna()].drop(columns=['region'])

    no_ocurrence = pd.merge(no_ocurrence, females, on=['tag_y','day','hour','min_sec'], how='left')
    no_ocurrence = no_ocurrence[no_ocurrence.region.isna()].drop(columns=['region'])

    ocurrence_dict = (16*12*int(60*60/window_size)-no_ocurrence.groupby(['tag_y', 'tag_x']).size()).to_dict()
    ocurrence_matrix = [[]] * len(FEMALES)
    for i in range(len(FEMALES)):
        ocurrence_matrix[i] = []
        for m in MALES:
            ocurrence_matrix[i].append(ocurrence_dict.get((FEMALES[i],m),0))

    ocurrence_matrix_pd = pd.DataFrame(ocurrence_matrix)
    ocurrence_matrix_pd.columns = MALES
    ocurrence_matrix_pd.rename(index={i:FEMALES[i] for i in range(len(FEMALES))}, inplace=True)
    return ocurrence_matrix_pd

In [24]:
results_totals = []
for window in windows:
    results_totals.append(get_ocurrence_totals(predictions, window))


In [25]:
for i, window in enumerate(windows):
    results_totals[i].to_csv(f'matrix_interval_totals_with_window_{window}.csv')

In [26]:
display(results_totals[0])
display(results_totals[1])
display(results_totals[2])

Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,3582,19518,26076,12773,9863,7146,20296,21022,23124,40516,4549,29272,46869,35413,11156,10559,23389,45645,21299
14,19518,16835,37326,25001,22060,19894,30197,31070,34408,50078,18822,39911,54900,44256,23669,22570,34446,54527,31871
21,26076,37326,23645,31225,28956,26610,37756,38423,40805,53836,25624,45419,50792,51087,29842,29647,41562,58666,38580
22,12773,25001,31225,9320,15311,12497,25948,26555,28856,44210,12021,34082,50870,39835,16210,16543,28701,49365,26783
23,9863,22060,28956,15311,6475,9943,22625,23438,26060,41936,9112,32296,49068,34329,13982,14021,26043,46992,23532
25,7146,19894,26610,12497,9943,3594,21263,21520,23831,40412,6374,29763,47540,35944,9051,11367,23567,45563,21722
27,20296,30197,37756,25948,22625,21263,17823,31766,34833,50932,19596,40572,55472,45035,25015,24169,35662,55578,32196
29,21022,31070,38423,26555,23438,21520,31766,18465,36819,51038,20242,40971,56671,46104,25111,24044,36513,55487,24699
31,23124,34408,40805,28856,26060,23831,34833,36819,20561,52409,22386,41897,58642,46000,27739,26806,33662,57388,37336
32,40516,50078,53836,44210,41936,40412,50932,51038,52409,37808,39976,57120,69051,60497,43107,44124,53934,53318,51257


Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,361,1952,2846,1526,1058,879,1825,1924,2100,3810,412,2889,3931,3236,1364,959,2212,4056,1983
14,1952,1743,3726,2621,2256,2125,2738,2892,3186,4524,1919,3715,4545,3960,2522,2159,3240,4683,2958
21,2846,3726,2661,3390,3143,3016,3619,3703,3863,4878,2817,4375,3988,4628,3342,3041,4027,5061,3739
22,1526,2621,3390,1213,1822,1614,2550,2657,2856,4203,1486,3414,4338,3728,2019,1771,2947,4415,2699
23,1058,2256,3143,1822,739,1225,2089,2230,2420,3938,1011,3192,4182,3155,1710,1382,2532,4175,2302
25,879,2125,3016,1614,1225,531,2104,2132,2373,3862,829,3004,4099,3447,1115,1206,2366,4103,2196
27,1825,2738,3619,2550,2089,2104,1622,2734,3026,4456,1784,3708,4475,3860,2532,2093,3148,4666,2796
29,1924,2892,3703,2657,2230,2132,2734,1726,3281,4502,1886,3741,4577,3977,2525,2158,3253,4665,2021
31,2100,3186,3863,2856,2420,2373,3026,3281,1909,4543,2057,3820,4703,3990,2816,2371,2981,4760,3358
32,3810,4524,4878,4203,3938,3862,4456,4502,4543,3598,3781,4976,5461,5085,4100,4000,4725,4225,4575


Unnamed: 0,10,14,21,22,23,25,27,29,31,32,11,17,20,24,26,28,30,33,34
10,289,1578,2314,1262,862,730,1441,1536,1663,3042,332,2316,3096,2562,1108,760,1759,3236,1579
14,1578,1411,2994,2117,1810,1731,2177,2307,2531,3564,1560,2959,3560,3138,2028,1732,2579,3699,2345
21,2314,2994,2172,2747,2538,2467,2903,2953,3089,3852,2302,3479,3146,3646,2704,2449,3221,3988,2973
22,1262,2117,2747,1016,1508,1341,2055,2139,2300,3344,1242,2747,3415,2964,1655,1448,2369,3507,2175
23,862,1810,2538,1508,606,1017,1662,1785,1921,3131,835,2553,3287,2503,1387,1107,2014,3321,1841
25,730,1731,2467,1341,1017,456,1686,1727,1911,3088,709,2418,3242,2750,910,990,1905,3288,1771
27,1441,2177,2903,2055,1662,1686,1282,2162,2375,3509,1416,2929,3502,3033,2016,1642,2477,3677,2210
29,1536,2307,2953,2139,1785,1727,2162,1379,2586,3547,1512,2975,3584,3137,2028,1709,2567,3670,1600
31,1663,2531,3089,2300,1921,1911,2375,2586,1517,3580,1633,3031,3674,3128,2252,1879,2347,3742,2650
32,3042,3564,3852,3344,3131,3088,3509,3547,3580,2876,3026,3922,4248,3986,3255,3166,3719,3329,3597


In [27]:
def get_total_intervals_by_bird(birds_predictions, window_size):
    birds_predictions['day'] = birds_predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
    birds_predictions['hour'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
    birds_predictions['minute'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
    birds_predictions['second'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
    birds_predictions['min_sec'] = (birds_predictions['minute'] * 60 + birds_predictions['second']).div(window_size).astype(int)
    
    
    birds_predictions_u = birds_predictions.groupby(['tag','day','hour','min_sec']).apply(lambda x: x.iloc[0]).reset_index(drop=True)
    ocurrence_dict = birds_predictions_u.groupby(['tag']).size().to_frame('total')
    return ocurrence_dict

In [28]:
results_by_bird = []
for window in windows:
    results_by_bird.append(get_total_intervals_by_bird(predictions, window))


In [29]:
for i, window in enumerate(windows):
    results_by_bird[i].to_csv(f'interval_with_emissions_by_bird_with_window_{window}.csv')

In [30]:
display(results_by_bird[0])
display(results_by_bird[1])
display(results_by_bird[2])

Unnamed: 0_level_0,total
tag,Unnamed: 1_level_1
10,3582
11,2792
14,16835
17,26871
20,44929
21,23645
22,9320
23,6475
24,32805
25,3594


Unnamed: 0_level_0,total
tag,Unnamed: 1_level_1
10,361
11,310
14,1743
17,2694
20,3814
21,2661
22,1213
23,739
24,3043
25,531


Unnamed: 0_level_0,total
tag,Unnamed: 1_level_1
10,289
11,262
14,1411
17,2167
20,3009
21,2172
22,1016
23,606
24,2416
25,456


In [52]:
## prueba
get_coocurrence_with_window_by_distance(predictions, 90, 500)

Unnamed: 0,10,14,21,22,23,25,27,29,31,32
11,249,75,100,11,10,10,133,122,99,90
17,108,613,662,431,127,208,390,429,661,787
20,181,749,2394,505,252,164,724,691,785,1447
24,135,502,760,338,589,60,634,597,717,1226
26,15,230,209,207,28,431,83,126,87,241
28,84,267,232,133,60,36,147,160,225,165
30,78,454,357,231,126,132,268,234,854,398
33,135,412,1015,267,243,101,679,746,632,3181
34,125,310,461,146,109,67,466,1463,185,633


In [53]:
### prueba
results = []
for time_range in range(4):
    print(7+time_range*3,7+time_range*3+2)
    predictions_in_time_range = predictions[predictions.hour.astype(int).isin(range(7+time_range*3,7+time_range*3+3))].copy()
    results.append(get_coocurrence_with_window_by_distance(predictions_in_time_range, 90, 500))

7 9
10 12
13 15
16 18


Unnamed: 0,10,14,21,22,23,25,27,29,31,32
11,9,1,4,0,0,0,0,1,0,7
17,4,225,215,181,40,43,147,152,64,195
20,22,267,776,233,106,37,270,242,119,442
24,17,237,296,126,226,25,260,252,67,425
26,3,43,49,39,7,94,25,30,18,46
28,2,65,76,39,15,6,49,38,21,72
30,0,87,69,57,19,20,58,58,49,56
33,24,152,319,102,113,14,255,259,73,826
34,8,143,170,75,56,16,128,505,41,231


In [65]:
(
    results[0].iloc[8].iloc[9] +
    results[1].iloc[8].iloc[9] +
    results[2].iloc[8].iloc[9] +
    results[3].iloc[8].iloc[9] 
)

633

### New with distances

In [31]:
def get_coocurrence_with_window_by_distance(birds_predictions, window_size, distance):
    birds_predictions['day'] = birds_predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
    birds_predictions['hour'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
    birds_predictions['minute'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
    birds_predictions['second'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
    birds_predictions['min_sec'] = (birds_predictions['minute'] * 60 + birds_predictions['second']).div(window_size).astype(int)
    
    males = birds_predictions[birds_predictions.tag.isin(MALES)][['tag','x','y','day','hour', 'min_sec']]
    females = birds_predictions[birds_predictions.tag.isin(FEMALES)][['tag','x','y','region','day','hour', 'min_sec']]
    
    coocurrence = pd.merge(females, males, on=['day','hour','min_sec'], suffixes=('_female', '_male'))
    coocurrence['is_close'] = ((coocurrence['x_female'] - coocurrence['x_male']) **2 + (coocurrence['y_female'] - coocurrence['y_male']) **2) ** 0.5 <= distance
    coocurrence_u = (
        coocurrence[coocurrence.is_close].groupby(['tag_female', 'tag_male','day','hour','min_sec'])
        .apply(lambda x: x.iloc[0])
        .reset_index(drop=True)
    )
    coocurrence_dict = coocurrence_u.groupby(['tag_female', 'tag_male']).size().to_dict()
    matrix_res = [[]] * len(FEMALES)
    for i in range(len(FEMALES)):
        matrix_res[i] = []
        for m in MALES:
            matrix_res[i].append(coocurrence_dict.get((FEMALES[i],m),0))
    matrix_res_pd = pd.DataFrame(matrix_res)
    matrix_res_pd.columns = MALES
    matrix_res_pd.rename(index={i:FEMALES[i] for i in range(len(FEMALES))}, inplace=True)
    return matrix_res_pd

In [32]:
windows = (60,90)
distances = (300, 500)

In [33]:
predictions['hour'] = predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)

In [35]:
for time_range in range(4):
    print(7+time_range*3,7+time_range*3+2)
    predictions_in_time_range = predictions[predictions.hour.astype(int).isin(range(7+time_range*3,7+time_range*3+3))].copy()
    for d in distances:
        results = []
        for window in windows:
            results.append(get_coocurrence_with_window_by_distance(predictions_in_time_range, window, d))
        for i, window in enumerate(windows):
            results[i].to_csv(f'nuevo-romi/Data/{window}sec/{d}met/matrix_coocurrence_with_distance_{d}_with_window_{window}-{7+time_range*3}-{7+time_range*3+2}.csv')


7 9
10 12
13 15
16 18


In [36]:
for time_range in range(4):
    print(7+time_range*3,7+time_range*3+2)
    predictions_in_time_range = predictions[predictions.hour.astype(int).isin(range(7+time_range*3,7+time_range*3+3))].copy()
    for window in windows:
        get_total_intervals_by_bird(predictions_in_time_range, window).to_csv(f'nuevo-romi/Data/{window}sec/interval_with_emissions_by_bird_with_window_{window}-{7+time_range*3}-{7+time_range*3+2}.csv')
    

7 9
10 12
13 15
16 18


In [37]:
def get_ocurrence_totals_with_time_range(birds_predictions, window_size):
    hour_range = list(birds_predictions.hour.unique())
    
    birds_predictions['day'] = birds_predictions.time.str.split(' ').str[0].str.split('-').str[2].astype(int)
    birds_predictions['hour'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[0].astype(int)
    birds_predictions['minute'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[1].astype(int).astype(int)
    birds_predictions['second'] = birds_predictions.time.str.split(' ').str[1].str.split(':').str[2].astype(int)
    birds_predictions['min_sec'] = (birds_predictions['minute'] * 60 + birds_predictions['second']).div(window_size).astype(int)
        
    males = birds_predictions[birds_predictions.tag.isin(MALES)][['tag','region','day','hour', 'min_sec']]
    females = birds_predictions[birds_predictions.tag.isin(FEMALES)][['tag','region','day','hour', 'min_sec']]


    full_buckets_data = {'tag_x':[],'tag_y':[],'day':[], 'hour':[], 'min_sec':[]}
    for day in range(10,26):
        for hour in hour_range: #range(7,19):
            for min_sec in range(0,int(60*60/window_size)):
                for male in MALES:
                    for female in FEMALES:
                        full_buckets_data['tag_x'].append(male)
                        full_buckets_data['tag_y'].append(female)
                        full_buckets_data['day'].append(day)
                        full_buckets_data['hour'].append(hour)
                        full_buckets_data['min_sec'].append(min_sec)

    full_buckets_data_pd = pd.DataFrame(full_buckets_data)

    males = males.rename(columns={'tag':'tag_x'})
    females = females.rename(columns={'tag':'tag_y'})
    no_ocurrence = pd.merge(full_buckets_data_pd, males, on=['tag_x','day','hour','min_sec'], how='left')
    no_ocurrence = no_ocurrence[no_ocurrence.region.isna()].drop(columns=['region'])

    no_ocurrence = pd.merge(no_ocurrence, females, on=['tag_y','day','hour','min_sec'], how='left')
    no_ocurrence = no_ocurrence[no_ocurrence.region.isna()].drop(columns=['region'])

    ocurrence_dict = (16*3*int(60*60/window_size)-no_ocurrence.groupby(['tag_y', 'tag_x']).size()).to_dict()
    ocurrence_matrix = [[]] * len(FEMALES)
    for i in range(len(FEMALES)):
        ocurrence_matrix[i] = []
        for m in MALES:
            ocurrence_matrix[i].append(ocurrence_dict.get((FEMALES[i],m),0))

    ocurrence_matrix_pd = pd.DataFrame(ocurrence_matrix)
    ocurrence_matrix_pd.columns = MALES
    ocurrence_matrix_pd.rename(index={i:FEMALES[i] for i in range(len(FEMALES))}, inplace=True)
    return ocurrence_matrix_pd

In [38]:
for time_range in range(4):
    print(7+time_range*3,7+time_range*3+2)
    predictions_in_time_range = predictions[predictions.hour.astype(int).isin(range(7+time_range*3,7+time_range*3+3))].copy()
    for window in windows:
        get_ocurrence_totals_with_time_range(predictions_in_time_range, window).to_csv(f'nuevo-romi/Data/{window}sec/matrix_interval_totals_with_window_{window}-{7+time_range*3}-{7+time_range*3+2}.csv')
    
    

7 9
10 12
13 15
16 18
