# Import Dependencies

In [1]:
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import math
import deepgraph as dg
from mpl_toolkits.basemap import Basemap

%matplotlib inline

plt.style.use("ggplot")

# Selecting Data From the Database

In [None]:
# Create the connection with the database
connection = psycopg2.connect(dbname='geocenterdev',
                              user='vcchow', 
                              host='cslvm74.csc.calpoly.edu', 
                              password='vcchow')

cur = connection.cursor()
 
query = """
SELECT
  A.id,
  A.version,
  B.id,
  B.version,
  B.author,
  ST_X(A.point),
  ST_Y(A.point),
  ST_X(B.point),
  ST_Y(B.point),
  ST_Distance(A.point, B.point) as dist
FROM (SELECT
        C.id,
        C.version,
        C.point,
        D.timestamp,
        D.author
      FROM (node C
        JOIN osm_entity D ON C.id = D.id AND C.version = D.version)
      WHERE author = 'bigalxyz123' AND extract(YEAR FROM D.timestamp) > 2016) A
  JOIN (SELECT
          N.id,
          N.version,
          N.point,
          O.author
        FROM (node N
          JOIN osm_entity O ON N.id = O.id AND N.version = O.version)
        WHERE extract(YEAR FROM O.timestamp) > 2016 and author != 'bigalxyz123') B ON ST_DWithin(A.point, B.point, 500)
where A.id != B.id limit 500000;
"""
cur.execute(query)
queried_data = cur.fetchall()

# Creating the Deep Graph

In [None]:
# Load the data into a pandas dataframe
youth_data = pd.DataFrame(queried_data, columns = ["A_id", "A_version", "B_id", 
                                                   "B_version", "B_author","A_lat", "A_lon", 
                                                   "B_lat", "B_lon", "Distance"])

In [None]:
youth_data.head()

In [None]:
subset_youth_data = youth_data.sample(1000)

In [None]:
graph = dg.DeepGraph(subset_youth_data)

Below I am creating the functions on how to connect each edge. I calculate the lat and lon distane from one point to the next and then I select only users that are within 90m of one another.

In [None]:
def great_circle(B_lat_s, B_lat_t, B_lon_s, B_lon_t):
    dist = dg.functions.great_circle_dist(B_lat_s, B_lat_t, B_lon_s, B_lon_t)
    return dist

I tried to reduce the selection criteria, but there were some errors. I will be trying to fix this in the next meeting.

In [None]:
def dist_selector(dist, sources, targets):
    dista = np.abs(dist)
    sources = sources[dista <= 200]
    targets = targets[dista <= 200]
    return sources, targets

In [None]:
# Create the graphs based on what criteria to connect and select
graph.create_edges(connectors = great_circle,
                   selectors = dist_selector)

In [None]:
graph.e

In [None]:
# Plotting the lat and lon as a scatterplot
obj = graph.plot_2d("B_lat", "B_lon", edges = True,
                kwds_scatter = {'c': graph.v.B_id,
                                'alpha': 0.5})


In [None]:
# Parameters for the map graph
kwds_basemap = {'llcrnrlon': graph.v.B_lon.min() - 40,
                'urcrnrlon': graph.v.B_lon.max() + 20,
                'llcrnrlat': graph.v.B_lat.min() - 20,
                'urcrnrlat': graph.v.B_lat.max() + 20,
                'resolution': 'i'}

kwds_scatter = {'s': 1, 
                'c': graph.v.B_id,
                'edgecolors':'none'}

In [None]:
# Mapping the nodes
objs = graph.plot_map("B_lat", "B_lon",
                       edges = True,
                       kwds_basemap = kwds_basemap,
                       kwds_scatter = kwds_scatter)

objs['m'].drawcoastlines(linewidth=.3)
objs['ax'].set_title("Map of 100 Nodes")

I wanted to make a graph which each user to other users dependent on their nodes. In future exploration, I will be adding time into the analysis. Picking the users based on the when they started making a node and then calculating how far away they are from other users and how close they are in proximity of time.