# Network analysis 2020 project

## Imports

In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import collections
from networkx.algorithms import community

## Read data

* [Real Datasets for Spatial Databases: Road Networks and Points of Interest
Dataset](https://www.cs.utah.edu/~lifeifei/SpatialDataset.htm) / 4. City of San Joaquin County (TG) Road Network
    * TG Road Network's Nodes (Node ID, Normalized X Coordinate, Normalized Y Coordinate)  
    * TG Road Network's Edges (Edge ID, Start Node ID, End Node ID, L2 Distance)
* [San Joaquin County, CA
Geographic Information Systems](http://www.sjmap.org/GISDataDownload.htm) / GIS Data Download
    * Points Of Interest
    * Streets

In [None]:
edges = pd.read_csv('TG.cedge', names=["edge_id", "start_node_id", "end_node_id", "l2_distance"], sep=" ")
G = nx.from_pandas_edgelist(edges, "start_node_id", "end_node_id", edge_attr=True)
nodes = pd.read_csv('TG.cnode', names=["node_id", "x_coordinate", "y_coordinate"], sep=" ")
data = nodes.set_index('node_id').to_dict('index').items()
G.add_nodes_from(data)

print("Nodes count: {}".format(len(G.nodes())))
print("Edges count: {}".format(len(G.edges())))

crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
points_of_interest = gpd.read_file("PointsOfInterest/PointsOfInterest.shp")
points_of_interest = points_of_interest.to_crs(crs)
points_of_interest = points_of_interest[points_of_interest.geometry.x < -120] # Falsy points "POINT (165.9984561648179 -90)"
streets = gpd.read_file("Streets/Streets.shp")
streets = streets.to_crs(crs)

print("Points of interest count: {}".format(len(points_of_interest)))

## Merge and plot graphs

In [None]:
# Parameters
x_fix=-121.58
y_fix=37.555
scale_x=0.000066
scale_y=0.000072

# Functions
def transform_x_coord(x, x_fix, scale_x):
    return x*scale_x + x_fix

def transform_y_coord(y, y_fix, scale_y):
    return y*-1*scale_y + y_fix + 10000*scale_y

def draw_road_graph(graph, min_edge_length=80, figure=None, figsize=(10,8), dpi=100, x_fix=0.0, y_fix=0.0, scale_x=1.0, scale_y=1.0, scatter_nodes=[]):
    if not figure:
        plt.figure(figsize=figsize, dpi=dpi)
    for edge in G.edges(data=True):
        edge_data = edge[2]
        if edge_data["l2_distance"] < min_edge_length:
            continue;
        start_node = G.nodes[edge[0]]
        end_node = G.nodes[edge[1]]
        x_coordinates = [transform_x_coord(start_node["x_coordinate"], x_fix, scale_x), transform_x_coord(end_node["x_coordinate"], x_fix, scale_x)]
        y_coordinates = [transform_y_coord(start_node["y_coordinate"], y_fix, scale_y), transform_y_coord(end_node["y_coordinate"], y_fix, scale_y)]
        plt.plot(x_coordinates, y_coordinates, 'k-', lw=1)
    
    scatter_x = []
    scatter_y = []
    for nodeIndex in scatter_nodes:
        node = G.nodes[nodeIndex]
        scatter_x.append(transform_x_coord(node["x_coordinate"], x_fix, scale_x))
        scatter_y.append(transform_y_coord(node["y_coordinate"], y_fix, scale_y))
    
    plt.scatter(scatter_x, scatter_y, alpha=0.5, s=2)
    plt.show()

# Plot graphs
fig, ax = plt.subplots(figsize=(10,8), dpi=100)
ax.set_aspect('equal')
points_of_interest.plot(ax=ax, color="red", markersize=5, zorder=2)
streets.plot(ax=ax, zorder=1)
draw_road_graph(G, x_fix=x_fix, y_fix=y_fix, figure=fig, scale_x=scale_x, scale_y=scale_y)

## Degree distribution

In [None]:
nodes, degrees = map(list, zip(*G.degree()))
count = collections.Counter(degrees)

x, y = map(list, zip(*count.items()))
print('Degrees: {}'.format(x))
print('Degree frequency: {}'.format(y))

fig, ax = plt.subplots()
plt.margins(x=0.08, y=0.08)
plt.bar(x,y)
plt.title('Degree distribution of the graph')
plt.xlabel('Degree')
plt.ylabel('Count')

for degree, freq in zip(x, y):
    ax.text(degree - .4, freq + 100, " "*(4-len(str(freq))) + str(freq), color='black')

## Community detection

Uses Clauset-Newman-Moore greedy modularity maximization. See [documentation](https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.modularity_max.greedy_modularity_communities.html).

In [18]:
c = list(community.greedy_modularity_communities(G))

In [None]:
draw_road_graph(G, scatter_nodes=c[0])
draw_road_graph(G, scatter_nodes=c[1])
draw_road_graph(G, scatter_nodes=c[2])
draw_road_graph(G, scatter_nodes=c[3])
draw_road_graph(G, scatter_nodes=c[4])
draw_road_graph(G, scatter_nodes=c[5])
draw_road_graph(G, scatter_nodes=c[6])
draw_road_graph(G, scatter_nodes=c[7])

In [None]:
#Computing number of nearest poi:s for each node (pretty slow ~5min)
import math

near_poi_count = {}
for node in G.nodes():
    near_poi_count[node] = 0
    
poi_x = points_of_interest.geometry.x
poi_y = points_of_interest.geometry.y
poi_coor = list(zip(poi_x, poi_y))
    
for coor in poi_coor:
    dist = np.inf
    nearest_node = np.NAN
    for node in G.nodes():
        x = transform_x_coord(G.node[node]["x_coordinate"], x_fix, scale_x)
        y = transform_y_coord(G.node[node]["y_coordinate"], y_fix, scale_y)
        dist2 = math.sqrt((x - coor[0])**2 + (y - coor[1])**2)
        if dist2 < dist:
            nearest_node = node
            dist = dist2
    near_poi_count[nearest_node] += 1

#Printing near_poi_counts in descending order
print({k: v for k, v in sorted(near_poi_count.items(), key=lambda item: item[1], reverse = True)})

In [118]:
#Adding attribute near_poi_count to every node
nx.set_node_attributes(G, near_poi_count, 'near_poi_count')