In [7]:
%matplotlib qt
import pandas as pd
import json
import numpy as np
import os
import re
import time
from matplotlib import pyplot as plt
import ast
from PIL import Image
import datetime
from collections import defaultdict

In [4]:
df = pd.read_csv('/Users/HP/Desktop/UNI/LM_1/ACN/ACN_project/data/filter_df_gian.csv')

In [6]:
df['address'].nunique()

3700

In [17]:
df['date'] = pd.to_datetime(df['date']).dt.date

In [None]:
def filter_edges_by_window(edge_dict, n, k):
    """
    Groups edges into temporal windows of n minutes and filters edges
    that occur more than k times in a given window.
    
    Parameters:
        edge_dict (dict): Keys are rounded times (HH:MM), values are lists of edges (tuples of ids).
        n (int): Size of the temporal window in minutes.
        k (int): Minimum frequency for an edge to be included in the window.
    
    Returns:
        dict: Keys are window ranges (start-end), values are filtered edges (lists of tuples).
    """
    # Initialize a dictionary to store filtered edges for each window
    window_edges = defaultdict(list)

    # Map rounded times to window indices
    for time, edges in edge_dict.items():
        # Convert time to total minutes
        hours, minutes = map(int, time.split(':'))
        window = (hours * 60 + minutes) // n
        
        # Add edges to the corresponding window
        window_edges[window].extend(edges)

    # Count and filter edges in each window
    result = {}
    for window, edges in window_edges.items():
        edge_counts = defaultdict(int)

        # Count occurrences of each edge, using a sorted tuple to ensure consistency
        for edge in edges:
            # Ensure consistent ordering by sorting the tuple
            edge_counts[tuple(sorted(edge))] += 1
        
        # Filter edges that occur more than k times
        filtered_edges = [edge for edge, count in edge_counts.items() if count > k]
        
        if filtered_edges:
            # Define window range in the HH:MM - HH:MM format
            start_time = f"{(window * n) // 60:02}:{(window * n) % 60:02}"
            end_time = f"{((window + 1) * n) // 60:02}:{((window + 1) * n) % 60:02}"
            window_range = f"{start_time} - {end_time}"
            result[window_range] = filtered_edges

    return result

def get_distance(position1, position2):
    """Calculate the Euclidean distance between two positions."""
    return np.sqrt((position1[0] - position2[0]) ** 2 + (position1[2] - position2[2]) ** 2)

def get_edges_with_window(df, date, radius, n, k):
    # Filter the DataFrame for the specified date
    df_filtered = df[df['date'] == date]

    # Get the unique rounded times and initialize the edges dictionary
    rounded_times = df_filtered['rounded_time'].unique()
    edges_dict = {}

    for time in rounded_times:
        #print(f'Time = {time}')
        
        # Filter for the specific time slice
        df_tmp = df_filtered[df_filtered['rounded_time'] == time]
        unique_ids = df_tmp['address'].unique()
        #print(f'Active players: {len(unique_ids)}')
        
        # Store positions for each user in a dictionary
        positions = {user_id: np.array(ast.literal_eval(df_tmp[df_tmp['address'] == user_id]['position'].tolist()[0])) for user_id in unique_ids}
        
        # Initialize the list of edges
        edges_tmp = []

        # Compare all pairs of users using a more efficient approach
        for i in range(len(unique_ids)):
            for j in range(i + 1, len(unique_ids)):
                id1, id2 = unique_ids[i], unique_ids[j]
                
                # Calculate the distance between the two users
                distance = get_distance(positions[id1], positions[id2])
                
                # If the distance is smaller than the radius, add an edge
                if distance < radius:
                    edges_tmp.append([id1, id2])
        
        #print(f'Edges created: {len(edges_tmp)}\n')
        edges_dict[time] = edges_tmp

    result = filter_edges_by_window(edges_dict, n, k)
    return result

def get_edges_date(df, radius, start_date, end_date, n, k):
    """
    Creates a dictionary of edges for each date in the specified range, applying a temporal window and filtering by frequency.

    Parameters:
        df (pd.DataFrame): DataFrame containing columns 'date', 'rounded_time', 'address', and 'position'.
        radius (float): Maximum distance to consider two users connected (edge).
        start_date (datetime.date): Start of the date range (inclusive).
        end_date (datetime.date): End of the date range (inclusive).
        n (int): Size of the temporal window in minutes.
        k (int): Minimum frequency for an edge to be included in the result.

    Returns:
        dict: Keys are dates, values are dictionaries of temporal windows and their filtered edges.
    """
    # Filter the DataFrame for the date range
    date_range = pd.date_range(start=start_date, end=end_date).date
    df_filtered = df[df['date'].isin(date_range)]

    # Initialize the result dictionary
    edges_by_date = {}

    for date in date_range:
        print(f"Processing date: {date}")
        # Filter for the specific date
        edges_by_date[date] = get_edges_with_window(df_filtered, date, radius, n, k)

    return edges_by_date


In [18]:
# Define parameters
start_date = datetime.date(2024, 4, 1)
end_date = datetime.date(2024, 4, 2)
radius = 10.0
n = 10  # Temporal window of 10 minutes
k = 5   # Minimum frequency of 2

# Call the function
edges_result = get_edges_date(df, radius, start_date, end_date, n, k)

# Inspect results
for date, windows in edges_result.items():
    print(f"Date: {date}")
    for window, edges in windows.items():
        print(f"  Window {window}: {len(edges)} edges")


Processing date: 2024-04-01
Time = 00:00
Active players: 162
Edges created: 359

Time = 00:01
Active players: 157
Edges created: 359

Time = 00:02
Active players: 159
Edges created: 349

Time = 00:03
Active players: 162
Edges created: 341

Time = 00:04
Active players: 166
Edges created: 358

Time = 00:05
Active players: 166
Edges created: 362

Time = 00:06
Active players: 169
Edges created: 386

Time = 00:07
Active players: 170
Edges created: 377

Time = 00:08
Active players: 175
Edges created: 377

Time = 00:09
Active players: 173
Edges created: 357

Time = 00:10
Active players: 172
Edges created: 346

Time = 00:11
Active players: 173
Edges created: 370

Time = 00:12
Active players: 175
Edges created: 359

Time = 00:13
Active players: 174
Edges created: 331

Time = 00:14
Active players: 173
Edges created: 344

Time = 00:15
Active players: 173
Edges created: 337

Time = 00:16
Active players: 172
Edges created: 350

Time = 00:17
Active players: 172
Edges created: 357

Time = 00:18
Activ