# Untangling Counties Ver 4

### Import libraries

In [95]:
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import colorlover as cl
colors = [[i / 255 for i in j] for j in cl.to_numeric(cl.scales['5']['qual']['Dark2'])]

### Functions

### Import data

In [96]:
county_shapes = pd.read_csv("./ny_county_shapes.csv").set_index("county_name")
squares = pd.read_csv("./ny_map_county_squares.csv")
squares["square_id"] = ny_map_county_squares.index
for i in ["lat", "lng"]:
    squares["new_{}_rank".format(i)] = squares["test_{}_rank".format(i)]

In [97]:
len(squares)

2300

In [98]:
squares.head()

Unnamed: 0,county_name,lat,lng,state,lng_rank,lat_rank,nyc,min_lng_rank,min_lat_rank,max_lng_rank,...,sqrt_mult,mid_lng_rank,mid_lat_rank,scaled_lng_rank,scaled_lat_rank,test_lat_rank,test_lng_rank,square_id,new_lat_rank,new_lng_rank
0,Montgomery,42.920886,-74.476076,NY,54.0,43.0,False,52.0,41.0,57.0,...,5.224579,54.5,42.5,51.38771,45.61229,12820.304341,16099.91708,0,12820.304341,16099.91708
1,Chautauqua,42.162278,-79.666456,NY,3.0,30.0,False,3.0,28.0,8.0,...,6.386439,5.5,32.0,-12.966097,17.227123,8944.398378,894.439838,1,8944.398378,894.439838
2,Rensselaer,42.862532,-73.661899,NY,62.0,42.0,False,61.0,36.0,65.0,...,9.693078,63.0,39.5,52.306922,66.232695,12522.157729,18485.08998,2,12522.157729,18485.08998
3,St Lawrence,44.379747,-75.392025,NY,45.0,68.0,False,41.0,63.0,53.0,...,3.236011,47.0,70.5,38.527978,59.909972,20273.969656,13416.597566,3,20273.969656,13416.597566
4,Delaware,41.987215,-74.984937,NY,49.0,27.0,False,45.0,25.0,54.0,...,3.238251,49.5,30.0,47.380874,17.285246,8049.95854,14609.184017,4,8049.95854,14609.184017


#### Finding neighbors

In [99]:
def get_angle(df, coord_pat, coord_list = ["lat", "lng"], entity_list = ["x", "y"]):
    return np.arctan2(
        df[coord_pat.format(coord_list[1], entity_list[1])] - df[coord_pat.format(coord_list[1], entity_list[0])],
        df[coord_pat.format(coord_list[0], entity_list[1])] - df[coord_pat.format(coord_list[0], entity_list[0])]) / np.pi

def get_neighbors(squares, dist = 1):
    neighbors = squares[["square_id", "lat_rank", "lng_rank"]]
    neighbors["dummy"] = 1
    neighbors = neighbors.merge(right = neighbors, on = ["dummy"], how = "left")
    neighbors = neighbors[
        ~(neighbors.square_id_x == neighbors.square_id_y) &
        (np.abs(neighbors.lng_rank_x - neighbors.lng_rank_y) <= dist) & 
        (np.abs(neighbors.lat_rank_x - neighbors.lat_rank_y) <= dist)]
    neighbors["angle"] = get_angle(neighbors, coord_pat = "{}_rank_{}")
    return neighbors.drop(columns = ["dummy"])

In [100]:
neighbors = get_neighbors(squares, dist = 4)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [101]:
neighbors["dummy"] = 1
num_neighbors = neighbors.groupby(
    ["square_id_x"],
    as_index = False)["dummy"].sum().rename(
    columns = {"square_id_x": "square_id", "dummy": "num_neighbors"})

In [102]:
squares_w_num_neighbors = squares.merge(right = num_neighbors, on = ["square_id"], how = "left")

In [103]:
squares_w_num_neighbors[pd.isnull(squares_w_num_neighbors.num_neighbors)]

Unnamed: 0,county_name,lat,lng,state,lng_rank,lat_rank,nyc,min_lng_rank,min_lat_rank,max_lng_rank,...,mid_lng_rank,mid_lat_rank,scaled_lng_rank,scaled_lat_rank,test_lat_rank,test_lng_rank,square_id,new_lat_rank,new_lng_rank,num_neighbors


In [119]:
def join_neigbors(squares, neighbors, select_cols):
    return squares[["square_id"] + select_cols].rename(
        columns = {"square_id": "square_id_x"}).merge(
        right = neighbors,
        on = ["square_id_x"],
        how = "right").merge(
        right = squares[["square_id"] + select_cols].rename(
            columns = {"square_id": "square_id_y"}),
        on = ["square_id_y"],
        how = "left")    

def update_pos(squares, neighbors, dist_mult = 10):
    for i in ["lat", "lng"]:
        squares["prop_{}_rank".format(i)] = squares["new_{}_rank".format(i)] + dist_mult * 2 * (np.random.rand() - 0.5)
    prop_neighbors = join_neigbors(
        squares,
        neighbors,
        select_cols = ["{}_{}_rank".format(i, j) for i in ["prop", "new"] for j in ["lat", "lng"]] + ["sqrt_mult"])

In [120]:
check_angles = join_neigbors(squares, neighbors, ["new_{}_rank".format(i) for i in ["lat", "lng"]])

In [121]:
check_angles["new_angle"] = get_angle(check_angles, "new_{}_rank_{}")

In [122]:
np.sum(np.abs(check_angles.angle - check_angles.new_angle))

7.6777556534679547e-11

In [123]:
update_pos(squares, neighbors)