In [1]:
import torch
import itertools
from sklearn import metrics
import math
import time
import csv
import numpy as np
from scipy.optimize import linear_sum_assignment

In [2]:
def accuracy(y_true, y_pred):
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    # Find optimal one-to-one mapping between cluster labels and true labels
    row_ind, col_ind = linear_sum_assignment(-contingency_matrix)
    return contingency_matrix[row_ind, col_ind].sum() / np.sum(contingency_matrix)

In [3]:
def GMM_without_Outlier(N,p,m,sigma,w):
    torch.manual_seed(1)
    count = torch.distributions.multinomial.Multinomial(N, w).sample()
    count = count.type(torch.int64)
    y=torch.zeros((N,1))
    x=torch.zeros((N,p))
    for i in range(m):
        pos = torch.randn(1, p)
        obs = pos + std[i]*torch.randn(int(count[i]),p)
        x[sum(count[:i]):sum(count[:i+1]),:]=obs
        y[sum(count[:i]):sum(count[:i+1]),:]=i;
    #neg = torch.randn(int(count[m]), p)
    #x[sum(count[:m]):sum(count[:m+1]),:]=neg
    #y[sum(count[:m]):sum(count[:m+1]),:]=-1
    return x,y

In [4]:
def scrlm(x,n,m,rho,F,device,seed):
    torch.manual_seed(seed)
    thr=0
    [N,p]=x.shape
    nsub = torch.randperm(N)
    nsub = nsub[:n]
    sloss=torch.zeros(n)
    sloss=sloss.to(device)
    loss=torch.cdist(x[nsub,:], x)
    loss=(loss**2)/(p*rho**2)-F
    loss[loss > 0] = 0
    sloss+=torch.sum(loss,1)
    idx = torch.argsort(sloss)
    idx = idx[sloss[idx]<-F]
    counter=0;
    sel= []
    while len(idx) and counter<=m-1:
        i=idx[0]
        sel.append(i)
        a = x[nsub[idx],:]
        b = x[nsub[i],:]
        dist = torch.sqrt(((a-b)**2).sum(axis=1))
        dist = (dist**2)/(p*rho**2)-F
        dist[dist > 0] = 0
        idx = idx[dist>=thr]
        counter=counter+1;
    result = torch.stack(sel)
    centers=x[nsub[result],:]
    return centers

In [5]:
with open('GMM_bound_p_vs_N.csv', 'w', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    p_list=list(range(5,50))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**5
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

5 100
6 97
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100


In [6]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(5,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**6
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

5 0
6 100
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [7]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(5,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**7
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

5 100
6 100
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 85
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [8]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(5,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**8
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

5 97
6 100
7 100
8 96
9 100
10 100
11 100
12 99
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [9]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(5,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**9
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

5 39
6 100
7 100
8 3
9 17
10 100
11 1
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [10]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**10
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 20
3 0
4 100
5 100
6 100
7 100
8 100
9 100
10 71
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [11]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**11
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 0
4 0
5 100
6 100
7 100
8 100
9 100
10 100
11 37
12 100
13 100
14 100
15 0
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [12]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**12
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 0
4 100
5 0
6 0
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [13]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**13
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 22
4 0
5 100
6 16
7 69
8 93
9 100
10 7
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [14]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**14
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 1
3 32
4 92
5 90
6 54
7 100
8 100
9 76
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [15]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**15
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 0
4 100
5 0
6 100
7 66
8 98
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 10
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [16]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**16
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 0
4 12
5 100
6 100
7 100
8 51
9 86
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100


In [17]:
with open('GMM_bound_p_vs_N.csv', 'a', newline='') as csvfile:
    fieldnames = ['N','n','m','a','rho','F','seed','accuracy','time','p','count','p_','count_']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    p_list=list(range(1,60))
    for k in range(len(p_list)):
        count = 0
        p = p_list[k]
        N=2**17
        m=3
        a = 0.7
        w_min=a/m
        w_max=1.3/m
        n = math.ceil(m/0.7*(math.log(m)+math.log(4/0.01)))
        run = 100
        F=2.5
        rho=0.5
        std=torch.linspace(1/16,0.25,m)
        #i = torch.tensor([0.2])
        w=torch.linspace(w_min,w_max,m)
        #w = torch.cat((w, i), 0)
        x,y = GMM_without_Outlier(N,p,m,std,w)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x=x.to(device)
        y=y.to(device)
        writer.writerow({'N':N, 'n':n,'m':m,'a':a,'rho':rho,'F':F})
        run=100
        for j in range(run):
            start_time2 = time.time()
            center=scrlm(x,n,m,rho,F,device,j)
            d=torch.cdist(center,x)
            d = (d**2)/(p*rho**2)-F
            [k,label]=torch.min(d,dim=0)
            label[k>0]=-1
            end_time2 = time.time()
            t1 = end_time2 - start_time2
            acc=accuracy(y.cpu(),label.cpu())
            if acc==1:
                count +=1
            writer.writerow({'seed':j,'accuracy':acc,'time':t1})
        writer.writerow({'p': p,'count':count})
        print(p,count)
        if count==100:
            writer.writerow({'p_':p,'count_':count})

1 0
2 0
3 0
4 92
5 100
6 100
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100
