In [1]:
import matplotlib.pyplot as plt

In [2]:
import numpy as np

In [3]:
dataset='brightkite'

In [4]:
k=3

In [5]:
m_value=6

In [6]:
k_core_path=f'k_cores/{dataset}/{k}'

In [7]:
detected_community_path = f'detected_communities/{dataset}/{k}/{m_value}'

In [8]:
import networkx as nx

In [9]:
detected_community = nx.read_adjlist(detected_community_path)

In [10]:
k_core = nx.read_adjlist(k_core_path)

In [11]:
len(detected_community), len(k_core)

(80, 1212)

In [12]:
from trajectory_pairwise_similarity_dataset import *

In [13]:
trajectory_pairwise_similarity_dataset=TrajectoryPairwiseSimilarityDataset(
    k_core.nodes,
    f'pairwise_similarities/{dataset}'
)

In [14]:
import itertools

In [15]:
connected_component_frozenset_to_pairwise_distances = {
    frozenset(connected_component): np.fromiter(
        (
            trajectory_pairwise_similarity_dataset.pairwise_similarity(first_vertex, second_vertex)
            for first_vertex, second_vertex in itertools.combinations(
                connected_component,
                2
            )
        ),
        float
    )
    for connected_component in nx.connected_components(detected_community)
}

In [16]:
sorted_connected_component_frozensets = sorted(
    connected_component_frozenset_to_pairwise_distances,
    key=lambda k: -np.mean(connected_component_frozenset_to_pairwise_distances[k])
)

In [17]:
k_core_connected_components = list(nx.connected_components(k_core))

In [18]:
for sorted_connected_component_frozenset in sorted_connected_component_frozensets:
    k_core_connected_component_containing_max_connected_component_frozenset = next(
        (
            k_core_connected_component
            for k_core_connected_component in k_core_connected_components
            if sorted_connected_component_frozenset.issubset(k_core_connected_component)
        )
    )
    print(len(k_core_connected_component_containing_max_connected_component_frozenset))

1202
1202
1202
1202
1202
1202
1202
10
1202


In [19]:
import visualization

In [20]:
visualization.visualize_social_network_and_emphasize_communities(
    detected_community
).render_notebook()

In [21]:
nx.nx_agraph.to_agraph(detected_community).draw("detected_community.pdf", prog="dot")

In [22]:
sorted_connected_component_frozensets[0]

frozenset({'10752', '10781', '10966', '10972', '1876', '2880', '2902', '6604'})

In [23]:
visualization.visualize_social_network_and_emphasize_communities(
    detected_community.subgraph(sorted_connected_component_frozensets[0])
).render_notebook()

In [24]:
nx.nx_agraph.to_agraph(detected_community.subgraph(sorted_connected_component_frozensets[0])).draw("selected_connected_component.pdf", prog="dot")

In [25]:
def layerwise_bfs(
    graph,
    start_layer
):
    explored = set(start_layer)
    current_layer = set(start_layer)
    
    while current_layer:
        yield current_layer
        
        next_layer = set()

        for node in current_layer:
            for adjacent_node in graph[node]: 
                if adjacent_node not in explored:
                    explored.add(adjacent_node)
                    next_layer.add(adjacent_node)

        current_layer = next_layer


In [26]:
import vector


def load_trajectory_dataset(trajectory_dataset_path):
    trajectory_dataset = {}
    lines = iter(open(trajectory_dataset_path, 'r'))
    next(lines)
    for line in lines:
        user, latitude_string, longitude_string, timestamp_string = line.split(',')
        latitude = float(latitude_string)
        longitude = float(longitude_string)
        timestamp = float(timestamp_string)
        
        if user in trajectory_dataset:
            trajectory_dataset[user].append((latitude, longitude, timestamp))
        else:
            trajectory_dataset[user] = trajectory = vector.vector(
                initial_shape=(10, 3),
                dtype=float
            )
            trajectory.append((latitude, longitude, timestamp))
    return {
        user: trajectory.get()
        for user, trajectory
        in trajectory_dataset.items()
    }


In [27]:
trajectory_dataset = load_trajectory_dataset('/home/abbas/community_detection/trajectories/brightkite')

In [28]:
trajectory_visualization = visualization.visualize_trajectories(
    [
        [point[:2] for point in trajectory_dataset[user] if point[0] != 0]
        for user in sorted_connected_component_frozensets[0]
    ]
)

trajectory_visualization

In [29]:
trajectory_visualization.save('trajectory_visualization.html')

In [30]:
import datetime

In [31]:
date_to_user_to_locations = {}

for user in sorted_connected_component_frozensets[0]:
    for point in trajectory_dataset[user]:
        if point[0] != 0:
            *coords, timestamp = point
            date = datetime.datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d')
            if date not in date_to_user_to_locations:
                date_to_user_to_locations[date] = {}
            user_to_locations = date_to_user_to_locations[date]
            if user not in user_to_locations:
                user_to_locations[user] = set()
            locations = user_to_locations[user]
            coords[0] = round(coords[0], 3)
            coords[1] = round(coords[1], 3)
            locations.add((coords[0], coords[1]))

In [32]:
date_to_user_to_locations

{'2009-02-17': {'10781': {(36.0, 138.0)}},
 '2009-03-14': {'10781': {(35.588, 139.368)}},
 '2009-03-15': {'10781': {(35.588, 139.368)}},
 '2009-03-16': {'10781': {(35.656, 139.552),
   (35.666, 139.731),
   (35.685, 139.751)},
  '2902': {(35.666, 139.731)},
  '6604': {(35.666, 139.731)}},
 '2009-03-17': {'10781': {(35.666, 139.731)},
  '2902': {(35.666, 139.731)},
  '6604': {(35.805, 139.602)}},
 '2009-03-23': {'10781': {(35.666, 139.731)},
  '2902': {(35.671, 139.889),
   (35.685, 139.751),
   (35.717, 139.95),
   (35.883, 139.633)}},
 '2009-04-03': {'10781': {(35.666, 139.731)}, '6604': {(35.666, 139.731)}},
 '2009-04-07': {'10781': {(35.666, 139.731)}, '1876': {(35.805, 139.602)}},
 '2009-04-30': {'10781': {(35.666, 139.731)}, '2902': {(35.664, 139.731)}},
 '2009-05-12': {'10781': {(35.666, 139.731)}, '2902': {(35.664, 139.731)}},
 '2009-05-13': {'10781': {(35.666, 139.731)}, '10972': {(35.666, 139.731)}},
 '2009-05-14': {'10781': {(35.666, 139.731)}},
 '2009-05-20': {'10781': {(35.

In [33]:
{
    date: user_to_locations
    for date, user_to_locations in date_to_user_to_locations.items()
    if len(user_to_locations) > 1
}

{'2009-03-16': {'10781': {(35.656, 139.552),
   (35.666, 139.731),
   (35.685, 139.751)},
  '2902': {(35.666, 139.731)},
  '6604': {(35.666, 139.731)}},
 '2009-03-17': {'10781': {(35.666, 139.731)},
  '2902': {(35.666, 139.731)},
  '6604': {(35.805, 139.602)}},
 '2009-03-23': {'10781': {(35.666, 139.731)},
  '2902': {(35.671, 139.889),
   (35.685, 139.751),
   (35.717, 139.95),
   (35.883, 139.633)}},
 '2009-04-03': {'10781': {(35.666, 139.731)}, '6604': {(35.666, 139.731)}},
 '2009-04-07': {'10781': {(35.666, 139.731)}, '1876': {(35.805, 139.602)}},
 '2009-04-30': {'10781': {(35.666, 139.731)}, '2902': {(35.664, 139.731)}},
 '2009-05-12': {'10781': {(35.666, 139.731)}, '2902': {(35.664, 139.731)}},
 '2009-05-13': {'10781': {(35.666, 139.731)}, '10972': {(35.666, 139.731)}},
 '2009-05-20': {'10781': {(35.664, 139.731)},
  '2902': {(35.664, 139.731)},
  '6604': {(35.664, 139.731)}},
 '2009-05-28': {'10781': {(35.666, 139.731)}, '2902': {(35.666, 139.731)}},
 '2009-06-05': {'10781': {(35

In [34]:
date_to_users_to_location = {}

for date, user_to_locations in date_to_user_to_locations.items():
    all_locations = {
        location
        for locations in user_to_locations.values()
        for location in locations
    }
    
    for location in all_locations:
        users = set()
        for user, locations in user_to_locations.items():
            if location in locations:
                users.add(user)
        if date not in date_to_users_to_location:
            date_to_users_to_location[date] = {}
        users_to_location = date_to_users_to_location[date]
        users_to_location[frozenset(users)] = location

In [35]:
date_to_users_to_location

{'2009-02-17': {frozenset({'10781'}): (36.0, 138.0)},
 '2009-03-14': {frozenset({'10781'}): (35.588, 139.368)},
 '2009-03-15': {frozenset({'10781'}): (35.588, 139.368)},
 '2009-03-16': {frozenset({'10781'}): (35.685, 139.751),
  frozenset({'10781', '2902', '6604'}): (35.666, 139.731)},
 '2009-03-17': {frozenset({'6604'}): (35.805, 139.602),
  frozenset({'10781', '2902'}): (35.666, 139.731)},
 '2009-03-23': {frozenset({'2902'}): (35.717, 139.95),
  frozenset({'10781'}): (35.666, 139.731)},
 '2009-04-03': {frozenset({'10781', '6604'}): (35.666, 139.731)},
 '2009-04-07': {frozenset({'1876'}): (35.805, 139.602),
  frozenset({'10781'}): (35.666, 139.731)},
 '2009-04-30': {frozenset({'2902'}): (35.664, 139.731),
  frozenset({'10781'}): (35.666, 139.731)},
 '2009-05-12': {frozenset({'2902'}): (35.664, 139.731),
  frozenset({'10781'}): (35.666, 139.731)},
 '2009-05-13': {frozenset({'10781', '10972'}): (35.666, 139.731)},
 '2009-05-14': {frozenset({'10781'}): (35.666, 139.731)},
 '2009-05-20': 

In [36]:
date_to_users_to_frequented_location = {
    date: users_to_location
    for date, users_to_location in date_to_users_to_location.items()
    if any((len(users) > 1 for users in users_to_location.keys()))
}

date_to_users_to_frequented_location

{'2009-03-16': {frozenset({'10781'}): (35.685, 139.751),
  frozenset({'10781', '2902', '6604'}): (35.666, 139.731)},
 '2009-03-17': {frozenset({'6604'}): (35.805, 139.602),
  frozenset({'10781', '2902'}): (35.666, 139.731)},
 '2009-04-03': {frozenset({'10781', '6604'}): (35.666, 139.731)},
 '2009-05-13': {frozenset({'10781', '10972'}): (35.666, 139.731)},
 '2009-05-20': {frozenset({'10781', '2902', '6604'}): (35.664, 139.731)},
 '2009-05-28': {frozenset({'10781', '2902'}): (35.666, 139.731)},
 '2009-06-05': {frozenset({'6604'}): (35.664, 139.733),
  frozenset({'10781', '6604'}): (35.666, 139.731)},
 '2009-06-17': {frozenset({'2902'}): (35.664, 139.731),
  frozenset({'10781', '6604'}): (35.666, 139.731)},
 '2009-06-19': {frozenset({'10972', '2902', '6604'}): (35.664, 139.731),
  frozenset({'10781', '2902'}): (35.666, 139.731)},
 '2009-06-22': {frozenset({'2902'}): (35.665, 139.728),
  frozenset({'10781', '2902'}): (35.666, 139.731)},
 '2009-07-06': {frozenset({'10966'}): (35.685, 139.75

In [37]:
from collections import Counter

In [38]:
all_frequented_locations_and_frequencies = Counter()

for users_to_location in date_to_users_to_frequented_location.values():
    for users, location in users_to_location.items():
        all_frequented_locations_and_frequencies[location] += len(users)

In [39]:
all_frequented_locations_and_frequencies

Counter({(35.685, 139.751): 3,
         (35.666, 139.731): 146,
         (35.805, 139.602): 1,
         (35.664, 139.731): 45,
         (35.664, 139.733): 81,
         (35.665, 139.728): 2,
         (35.606, 139.622): 2,
         (35.673, 139.732): 2,
         (35.656, 139.795): 2,
         (35.66, 139.695): 2,
         (35.628, 139.739): 4,
         (35.552, 139.78): 1,
         (35.643, 139.536): 3,
         (35.665, 139.732): 3,
         (35.689, 139.692): 1,
         (35.701, 139.746): 2,
         (35.657, 139.914): 1,
         (35.681, 139.766): 1,
         (35.647, 139.71): 4,
         (35.679, 139.711): 1,
         (35.68, 139.768): 1,
         (35.62, 139.729): 1,
         (35.662, 139.697): 1,
         (35.66, 139.73): 1,
         (35.667, 139.731): 3,
         (35.906, 139.624): 1,
         (35.641, 139.755): 1,
         (35.672, 139.766): 2,
         (35.672, 139.76): 1,
         (35.659, 139.701): 3,
         (35.662, 139.7): 2,
         (35.643, 139.712): 1,
         (35.6

In [40]:
all_frequented_locations_and_frequencies.most_common(10)

[((35.666, 139.731), 146),
 ((35.664, 139.733), 81),
 ((35.664, 139.731), 45),
 ((35.628, 139.739), 4),
 ((35.647, 139.71), 4),
 ((35.685, 139.751), 3),
 ((35.643, 139.536), 3),
 ((35.665, 139.732), 3),
 ((35.667, 139.731), 3),
 ((35.659, 139.701), 3)]

In [41]:
all_dates_and_frequencies = Counter()

for date, users_to_location in date_to_users_to_frequented_location.items():
    for users, location in users_to_location.items():
        all_dates_and_frequencies[date] += len(users)

In [42]:
all_dates_and_frequencies

Counter({'2009-03-16': 4,
         '2009-03-17': 3,
         '2009-04-03': 2,
         '2009-05-13': 2,
         '2009-05-20': 3,
         '2009-05-28': 2,
         '2009-06-05': 3,
         '2009-06-17': 3,
         '2009-06-19': 5,
         '2009-06-22': 3,
         '2009-07-06': 3,
         '2009-07-07': 3,
         '2009-07-24': 3,
         '2009-09-16': 8,
         '2009-10-02': 5,
         '2009-10-13': 3,
         '2009-10-26': 3,
         '2009-10-31': 4,
         '2009-11-13': 8,
         '2009-11-24': 3,
         '2009-11-25': 8,
         '2009-12-06': 2,
         '2009-12-07': 3,
         '2009-12-09': 2,
         '2009-12-18': 5,
         '2009-05-15': 3,
         '2009-05-29': 2,
         '2009-06-26': 5,
         '2009-07-08': 3,
         '2009-07-17': 5,
         '2009-08-21': 9,
         '2009-09-07': 3,
         '2009-09-15': 5,
         '2009-10-16': 5,
         '2009-11-19': 8,
         '2009-11-27': 10,
         '2009-12-01': 4,
         '2009-12-21': 6,
         '2

In [43]:
all_dates_and_frequencies.most_common(10)

[('2009-11-27', 10),
 ('2009-08-21', 9),
 ('2009-09-16', 8),
 ('2009-11-13', 8),
 ('2009-11-25', 8),
 ('2009-11-19', 8),
 ('2009-12-21', 6),
 ('2010-01-08', 6),
 ('2010-03-16', 6),
 ('2009-11-05', 6)]

In [44]:
import pandas as pd


def f(k, k_):
    dates = []
    most_common_locations_and_records = {
        point: [] for point, frequency in all_frequented_locations_and_frequencies.most_common(k)
    }
    
    most_common_dates = {date for date, frequencies in all_dates_and_frequencies.most_common(k_) }
    for date, users_to_frequented_location in date_to_users_to_frequented_location.items():
        if date in most_common_dates:
            frequented_location_to_users = dict(
                (v, k) for (k, v) in users_to_frequented_location.items()
            )
            if any((location in frequented_location_to_users for most_common_location in most_common_locations_and_records)):
                dates.append(date)
                for most_common_location, records in most_common_locations_and_records.items():
                    if most_common_location in frequented_location_to_users:
                        most_common_locations_and_records[most_common_location].append(
                            ', '.join(frequented_location_to_users[most_common_location])
                        )
                    else:
                        most_common_locations_and_records[most_common_location].append(
                            ''
                        )
    
    data = {
        'Date': dates
    }
    
    for most_common_location, records in most_common_locations_and_records.items():
        if any(records):
            data[str(most_common_location)] = records
    
    return pd.DataFrame(data)

In [45]:
df = f(5, 20)
df

Unnamed: 0,Date,"(35.666, 139.731)","(35.664, 139.733)","(35.664, 139.731)","(35.628, 139.739)","(35.647, 139.71)"
0,2009-06-19,"10781, 2902",,"10972, 6604, 2902",,
1,2009-09-16,,"10781, 10972, 10966, 6604, 1876, 2902","10781, 6604",,
2,2009-10-02,10781,"6604, 10966","6604, 2902",,
3,2009-11-25,"10781, 2880, 2902",,"6604, 10966, 2880, 10752",,
4,2009-12-18,,,"10781, 10966, 2880, 10752, 6604",,
5,2009-09-15,"10972, 6604, 2902",,"6604, 2902",,
6,2009-12-21,1876,,2902,2880.0,"10972, 2880, 1876"
7,2010-03-16,2880,,"10972, 10966, 2880, 6604",,
8,2009-12-28,2880,2902,"6604, 10966, 2880",,
9,2010-01-19,"2880, 2902",,"10966, 2880, 1876, 10752",,


In [46]:
print(df.to_latex(
    caption=f'The Most Checked-in Locations on the Days with the Most Check-ins',
    label=f'table:the_most_checked_in_locations_on_the_days_with_the_most_check_ins',
    index=False
))

\begin{table}
\centering
\caption{The Most Checked-in Locations on the Days with the Most Check-ins}
\label{table:the_most_checked_in_locations_on_the_days_with_the_most_check_ins}
\begin{tabular}{llllll}
\toprule
      Date & (35.666, 139.731) &                     (35.664, 139.733) &               (35.664, 139.731) & (35.628, 139.739) &  (35.647, 139.71) \\
\midrule
2009-06-19 &       10781, 2902 &                                       &               10972, 6604, 2902 &                   &                   \\
2009-09-16 &                   & 10781, 10972, 10966, 6604, 1876, 2902 &                     10781, 6604 &                   &                   \\
2009-10-02 &             10781 &                           6604, 10966 &                      6604, 2902 &                   &                   \\
2009-11-25 & 10781, 2880, 2902 &                                       &        6604, 10966, 2880, 10752 &                   &                   \\
2009-12-18 &                   &     

In [47]:
most_frequent_points_visualization = visualization.visualize_points(
    [point for point, frequency in all_frequented_locations_and_frequencies.most_common(
        len(df.columns) - 1
    )]
)

most_frequent_points_visualization

In [48]:
most_frequent_points_visualization.save('most_frequent_points_visualization.html')