In [1]:
import os
import math
import numpy as np
import pandas as pd
from scipy.special import comb, perm
from scipy.stats import binom
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn import preprocessing
from matplotlib import animation
from mpl_toolkits.mplot3d import Axes3D, proj3d

In [2]:
# extract "close" and "far" data frame
proj_df = pd.read_csv("data_proj_414.csv")
close_df = proj_df.loc[proj_df["Close"]>0,["X","Y","Close"]]
far_df = proj_df.loc[proj_df["Far"]>0,["X","Y","Far"]]
frames = [close_df, far_df]
both_df = pd.concat(frames)

# extract data frame of each person
# Potter_df = proj_df.loc[(proj_df["Potter"]>0) & (proj_df["Trip"]<4),["X","Y"]]
# Weasley_df = proj_df.loc[(proj_df["Weasley"]>0)& (proj_df["Trip"]<4),["X","Y"]]
# Granger_df = proj_df.loc[(proj_df["Granger"]>0)& (proj_df["Trip"]<4),["X","Y"]]

# plt.scatter(Potter_df["X"],Potter_df["Y"],color = "tomato",alpha=0.6)
# plt.scatter(Weasley_df["X"],Weasley_df["Y"],color = "dodgerblue",alpha=0.6)
# plt.scatter(Granger_df["X"],Granger_df["Y"],color = "forestgreen",alpha=0.6)

In [3]:
# extract the density of the Tayes
# 1. divide the map into grids
ran_x = 100
ran_y = 100 # since the farest observation of "close" is around 88
grid_size = 1
grid_num = int(ran_x / grid_size)
close_df["Grid_X"] = (close_df["X"]/grid_size).apply(np.ceil)
close_df["Grid_Y"] = (close_df["Y"]/grid_size).apply(np.ceil)

# Used to calculate the expectation of #Tayes in the intersection area
def intersection(x1, y1, x2, y2, count1, count2):
    r = 1
    dist = math.sqrt(math.pow((x1-x2),2) + math.pow((y1-y2),2))
    if dist == 0: # avoid intersection with self 
        return 0
    if dist > r: # two circles are unoverlapped
        return 0
    area = math.pi
    alpha = math.acos(pow(dist,2)/(2*dist*r))
    area3 = alpha * 2 - math.sin(alpha)*dist # intersection area
    ceil = min(count1,count2)
    if ceil == 0: # there is no observed Tayes in two circles
        return 0
    else:
        p3 = area3 / area # probability of landing in intersection area
        prob = [0]
        px = 0
        for x in range(0,ceil+1):
            prob.append(binom.pmf(k=x, n=count1, p=p3)*binom.pmf(k=x, n=count2, p=p3))
        expectation = 0
        for i in range(1,ceil+1):
            expectation += i * prob[i] # the expected value of Tayes in area3
        expectation /= sum(prob)
        inter_density = expectation / area3
    return inter_density

def density(Grid_X, Grid_Y, close_df):
    grid_df = close_df.loc[(close_df["Grid_X"]==Grid_X) & (close_df["Grid_Y"]==Grid_Y)]
    num = grid_df.shape[0] # number of observation within this grid
    density = 0
    if num == 0:
        return 0
    elif num == 1:
        density = grid_df["Close"] / math.pi
    elif num > 1:
        for i in range(0,num-1):
            for j in range(i,num):
                x1 = grid_df.iloc[i,0]
                y1 = grid_df.iloc[i,1]
                x2 = grid_df.iloc[j,0]
                y2 = grid_df.iloc[j,1]
                count1 = grid_df.iloc[i,2]
                count2 = grid_df.iloc[j,2]
                density += intersection(x1, y1, x2, y2, count1, count2)
        density /= (math.factorial(num-1)*1.7576)
    return density

# max(proj_df["Y"])
# max(proj_df["X"])

In [4]:
# # K-means
# kmeans = KMeans(n_clusters=30, init='k-means++', random_state=98)
# label = kmeans.fit_predict(close_df)

# #Getting the Centroids
# centroids = kmeans.cluster_centers_

# #Getting unique labels
# u_labels = np.unique(label)+1

# for i in u_labels:
#     plt.scatter(close_df[label == i]["X"] , close_df[label == i]["Y"], cmap = "jet",alpha = 0.5)

# plt.scatter(centroids[:,0] , centroids[:,1] , s = 50, color = 'k')
# plt.legend()
# plt.show()

In [5]:
density_data = np.zeros(shape=(10000,3))
density_df = pd.DataFrame(density_data, columns = ["Grid_X", "Grid_Y", "Density"])
for i in range(0,100):
    for j in range(0,100):
        density_df.iloc[i*100+j,0] = i+1
        density_df.iloc[i*100+j,1] = j+1
        
# for i in close_df["Grid_X"].unique():
#     # update the value of density
#     temp_df = density_df.loc[density_df["Grid_X"]==i]
#     print(temp_df)
    
temp_df = density_df.loc[density_df["Grid_X"]==47]
for j in range(0,100):
    temp_df.iloc[j,2] = density(i,j+1,close_df)
print(temp_df)
print(temp_df.iloc[0,2])

Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: [X, Y, Close, Grid_X, Grid_Y]
Index: []
Empty DataFrame
Columns: 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

In [6]:
close_df["Grid_X"].unique()

array([35., 37., 38., 39., 47., 46., 48., 49., 51., 52., 50., 36., 53.,
       45., 44., 60., 59., 57., 58., 55., 54., 56., 65., 67., 68., 64.,
       66., 17., 21., 19., 41., 16., 43., 40., 42., 20., 82., 18., 81.,
       84., 83., 85., 86., 34., 33., 32., 63., 62., 69., 78., 77., 80.,
       79., 31., 76., 75., 22., 61., 74., 73.])