In [102]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from pathlib import Path

In [2]:
def poisson(r, fix_num):
    # Choose up to k points around each reference point as candidates for a new
    # sample point
    k = 30

    # Minimum distance between samples

    width, height = 10, 10

    # Cell side length
    a = r/np.sqrt(2)
    # Number of cells in the x- and y-directions of the grid
    nx, ny = int(width / a) + 1, int(height / a) + 1

    # A list of coordinates in the grid of cells
    coords_list = [(ix, iy) for ix in range(nx) for iy in range(ny)]
    # Initilalize the dictionary of cells: each key is a cell's coordinates, the
    # corresponding value is the index of that cell's point's coordinates in the
    # samples list (or None if the cell is empty).
    cells = {coords: None for coords in coords_list}

    def get_cell_coords(pt):
        """Get the coordinates of the cell that pt = (x,y) falls in."""

        return int(pt[0] // a), int(pt[1] // a)

    def get_neighbours(coords):
        """Return the indexes of points in cells neighbouring cell at coords.

        For the cell at coords = (x,y), return the indexes of points in the cells
        with neighbouring coordinates illustrated below: ie those cells that could 
        contain points closer than r.

                                         ooo
                                        ooooo
                                        ooXoo
                                        ooooo
                                         ooo

        """

        dxdy = [(-1,-2),(0,-2),(1,-2),(-2,-1),(-1,-1),(0,-1),(1,-1),(2,-1),
                (-2,0),(-1,0),(1,0),(2,0),(-2,1),(-1,1),(0,1),(1,1),(2,1),
                (-1,2),(0,2),(1,2),(0,0)]
        neighbours = []
        for dx, dy in dxdy:
            neighbour_coords = coords[0] + dx, coords[1] + dy
            if not (0 <= neighbour_coords[0] < nx and
                    0 <= neighbour_coords[1] < ny):
                # We're off the grid: no neighbours here.
                continue
            neighbour_cell = cells[neighbour_coords]
            if neighbour_cell is not None:
                # This cell is occupied: store this index of the contained point.
                neighbours.append(neighbour_cell)
        return neighbours

    def point_valid(pt):
        """Is pt a valid point to emit as a sample?

        It must be no closer than r from any other point: check the cells in its
        immediate neighbourhood.

        """

        cell_coords = get_cell_coords(pt)
        for idx in get_neighbours(cell_coords):
            nearby_pt = samples[idx]
            # Squared distance between or candidate point, pt, and this nearby_pt.
            distance2 = (nearby_pt[0]-pt[0])**2 + (nearby_pt[1]-pt[1])**2
            if distance2 < r**2:
                # The points are too close, so pt is not a candidate.
                return False
        # All points tested: if we're here, pt is valid
        return True

    def get_point(k, refpt):
        """Try to find a candidate point relative to refpt to emit in the sample.

        We draw up to k points from the annulus of inner radius r, outer radius 2r
        around the reference point, refpt. If none of them are suitable (because
        they're too close to existing points in the sample), return False.
        Otherwise, return the pt.

        """
        i = 0
        while i < k:
            rho, theta = np.random.uniform(r, 2*r), np.random.uniform(0, 2*np.pi)
            pt = refpt[0] + rho*np.cos(theta), refpt[1] + rho*np.sin(theta)
            if not (0 <= pt[0] < width and 0 <= pt[1] < height):
                # This point falls outside the domain, so try again.
                continue
            if point_valid(pt):
                return pt
            i += 1
        # We failed to find a suitable point in the vicinity of refpt.
        return False

    # Pick a random point to start with.
    pt = (np.random.uniform(0, width), np.random.uniform(0, height))
    samples = [pt]
    # Our first sample is indexed at 0 in the samples list...
    cells[get_cell_coords(pt)] = 0
    # ... and it is active, in the sense that we're going to look for more points
    # in its neighbourhood.
    active = [0]

    nsamples = 1
    # As long as there are points in the active list, keep trying to find samples.
    while active:
        # choose a random "reference" point from the active list.
        idx = np.random.choice(active)
        refpt = samples[idx]
        # Try to pick a new point relative to the reference point.
        pt = get_point(k, refpt)
        if pt:
            # Point pt is valid: add it to the samples list and mark it as active
            samples.append(pt)
            nsamples += 1
            active.append(len(samples)-1)
            cells[get_cell_coords(pt)] = len(samples) - 1
            if len(samples) == fix_num:
                break
        else:
            # We had to give up looking for valid points near refpt, so remove it
            # from the list of "active" points.
            active.remove(idx)
    
#     plt.scatter(*zip(*samples), color='r', alpha=0.6, lw=0)
#     plt.xlim(0, width)
#     plt.ylim(0, height)
#     plt.axis('off')
#     plt.show()
    
    return samples

In [113]:
poisson(0.3)

736

In [115]:
radius_dict

{9: 2.89,
 11: 2.62,
 10: 2.74,
 12: 2.52,
 13: 2.46,
 14: 2.25,
 15: 2.29,
 16: 2.17,
 17: 2.06,
 18: 2.01,
 19: 2.0,
 20: 1.93,
 21: 1.83,
 22: 1.8499999999999999,
 23: 1.81,
 24: 1.75,
 25: 1.72,
 26: 1.67,
 27: 1.63,
 29: 1.6199999999999999,
 30: 1.56,
 28: 1.6099999999999999,
 31: 1.53,
 32: 1.51,
 33: 1.48,
 34: 1.46,
 36: 1.4,
 38: 1.3699999999999999,
 37: 1.38,
 39: 1.3599999999999999,
 41: 1.33,
 42: 1.31,
 44: 1.26,
 45: 1.23,
 46: 1.28,
 47: 1.25,
 48: 1.19,
 50: 1.22,
 49: 1.2,
 51: 1.18,
 52: 1.16,
 55: 1.14,
 54: 1.13,
 57: 1.1199999999999999,
 59: 1.0999999999999999,
 61: 1.08,
 64: 1.07,
 65: 1.05,
 66: 1.04,
 67: 1.03,
 69: 1.0,
 73: 0.9899999999999998,
 76: 0.98,
 74: 0.9699999999999998,
 77: 0.96,
 79: 0.9500000000000002,
 78: 0.94,
 80: 0.9300000000000002,
 82: 0.9199999999999999,
 86: 0.8900000000000001,
 85: 0.8999999999999999,
 91: 0.8599999999999999,
 92: 0.8700000000000001,
 95: 0.8500000000000001,
 98: 0.8399999999999999,
 102: 0.8300000000000001,
 104: 0.8199

In [114]:
radius_dict = {}
# for i in range(150):
#     the_tmp_sum = 0
#     print(i)
#     r = 9.9 - 0.1*i
#     for j in range(3):
#         the_tmp_sum += poisson(r)
#     key = round(the_tmp_sum/3)
#     radius_dict[key] = r
print('---')
for i in range(270):
    print(i)
    the_tmp_sum = 0
    r = 3 - 0.01*i
    for j in range(3):
        the_tmp_sum += poisson(r)
    key = round(the_tmp_sum/3)
    radius_dict[key] = r

---
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269


In [116]:
data = {
    'point_num': [],
    'r': []
}
for k, v in radius_dict.items():
    data['point_num'].append(k)
    data['r'].append(v)
    
df = pd.DataFrame.from_dict(data)
df = df.sort_values(by='point_num')
df = df.reset_index(drop=True)
df.to_csv('poisson_radius.csv')

In [3]:
df = pd.read_csv('poisson_radius.csv', index_col=0, dtype={'point_num': np.int32, 'r': np.float32})

In [4]:
df

Unnamed: 0,point_num,r
0,9,2.89
1,10,2.74
2,11,2.62
3,12,2.52
4,13,2.46
...,...,...
111,533,0.35
112,568,0.34
113,598,0.33
114,630,0.32


In [109]:
def good_data_two_categories(samples, category_number, points_number, delta):
#     samples.sort(key=lambda x:x[1], reverse=True)
    if sum(list(zip(*samples[:int((len(samples))/2)]))[1]) > sum(list(zip(*samples[int((len(samples))/2):]))[1]):
        A = samples[:int((len(samples))/2)]
        B = samples[int((len(samples))/2):]
    else:
        A = samples[int((len(samples))/2):]
        B = samples[:int((len(samples))/2)]
    A.sort(key=lambda x:x[1], reverse=True)
    B.sort(key=lambda x:x[1], reverse=True)
    cnt = 0
    while (True):
        if cnt > 10000:
            break
        cnt += 1
        avg_A = sum(list(zip(*A))[1])/len(A)
        avg_B = sum(list(zip(*B))[1])/len(B)
        if abs((avg_A-avg_B)-delta) < 0.05:
            break
        if (avg_A-avg_B) > delta:
            cnt_2 = 0
            while (True):
                cnt_2 += 1
                add_index = int(cnt_2/20)
                if add_index >= int(len(A)/2):
                    add_index = int(len(A)/2)
                select_index_A = random.randint(0, int(len(A)/2)-add_index)
                select_index_B = random.randint(int(len(B)/2)+add_index, len(B)-1)
                if A[select_index_A][1] > B[select_index_B][1]:
                    tmp = A[select_index_A]
                    A[select_index_A] = B[select_index_B]
                    B[select_index_B] = tmp
                    break
            A.sort(key=lambda x:x[1], reverse=True)
            B.sort(key=lambda x:x[1], reverse=True)
        else:
            cnt_2 = 0
            while (True):
                cnt_2 += 1
                add_index = int(cnt_2/20)
                if add_index >= int(len(A)/2):
                    add_index = int(len(A)/2)
                select_index_A = random.randint(int(len(A)/2)+add_index, len(A)-1)
                select_index_B = random.randint(0, int(len(B)/2)-add_index)
                if A[select_index_A][1] < B[select_index_B][1]:
                    tmp = A[select_index_A]
                    A[select_index_A] = B[select_index_B]
                    B[select_index_B] = tmp
                    break
            A.sort(key=lambda x:x[1], reverse=True)
            B.sort(key=lambda x:x[1], reverse=True)
    print(avg_A, avg_B, avg_A-avg_B)
    return A+B
def get_samples(radius, category_number, points_number, total_points, delta, filename):
    while (True):
        samples = poisson(radius, total_points)
        if len(samples) == total_points:
            break
    samples = good_data_two_categories(samples, category_number, points_number, delta)
    cat_index = [0, 1]
    random.shuffle(cat_index)
    cat = []
    for cat_i in cat_index:
        for i in range(points_number):
            cat.append(cat_i)
    
    my_data = {'x': list(zip(*samples))[0], 'y': list(zip(*samples))[1], 'ca': cat}
    df_data = pd.DataFrame.from_dict(my_data)
    df_data = df_data.sort_values(by=['ca', 'y'])
    df_data = df_data.reset_index(drop=True)
    Path("./delta_"+str(delta)).mkdir(parents=True, exist_ok=True)
    df_data.to_csv("./delta_"+str(delta)+'/'+filename, index=False)
    

In [112]:
category_numbers = [2]
points_numbers = [15]

for category_number in category_numbers:
    for points_number in points_numbers:
        total_point = category_number*points_number
        select_radius = -1
        for idx in df.index.to_list()[:-1]:
            a = df.loc[idx]['point_num']
            b = df.loc[idx+1]['point_num']
            if total_point > a and total_point <= b:
                select_radius = df.loc[idx+1]['r']
                break
        for delta in [5.0]:
            for cnt in range(20):
                get_samples(select_radius, category_number, points_number, total_point, delta, str(cnt)+'.csv')

7.793071363236464 2.8003875344570486 4.992683828779416
7.527283043832351 2.5175742010907527 5.009708842741598
7.618143680806657 2.6550948457615644 4.963048835045093
7.5299484083397115 2.5597018247460275 4.970246583593684
7.731058283066517 2.723799704718244 5.007258578348273
7.574257299750907 2.559069509489581 5.015187790261326


In [96]:
np.linspace(0.5, 5, 10)

array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

In [17]:
int(3.6)

3

In [35]:
random.randint(0,0)

0

In [55]:
[0 for i in range(15)]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [73]:
a = [0, 1]
random.shuffle(a)
print(a)

[0, 1]
