In [1]:
import math
from math import floor
import random
from collections import defaultdict
import itertools

In [2]:
#Returns a Vector from a list

class Vector(object):

    def __init__(self, components):
        if not isinstance(components, tuple) and not isinstance(components, list):
            raise ValueError("Components must be a tuple or list.")
        if len(components) == 0:
            raise ValueError("Components must be at least 1-dimensional.")
        self.dimensions = len(components)
        self.components = components

    def __getitem__(self, key):
        return self.components[key]

    def __str__(self):
        return self.components.__str__()

    def __len__(self):
        return self.dimensions

In [3]:
# Measures square of euclidian distance for a nD vector

def cheap_dist(v1, v2):
    comps1 = v1.components
    comps2 = v2.components
    max_dimensions = max(v1.dimensions, v2.dimensions)
    dist = 0.0
    for i in range(0, max_dimensions):
        dist += math.pow(v1[i]-v2[i] , 2)
    return dist

In [4]:
# Measures euclidian distance for a nD vector

def dist(v1, v2):
    return math.sqrt(cheap_dist(v1,v2))

In [5]:
#example :


pts = [
        Vector([4,3,16,5,5]),
        Vector([230,423,150,2,180]),
        Vector([123,500,400,302,405]),
        Vector([6,4,10,8,105]),
        Vector([123,432,432,8,451]),
        Vector([14,113,116,51,51])
    ]

In [6]:
dist(pts[0],pts[1])

525.4198321342658

In [7]:
n  = len(pts)
mat_n = [[-1 for j in range(n)] for i in range(n)]

In [8]:
for i in range(len(mat_n)):
    for j in range (i,len(mat_n)):
        mat_n[i][j] = int(dist(pts[i],pts[j]))
        print("Distance between Point{} and Point{} is = {}".format(i+1,j+1,mat_n[i][j]))
        

Distance between Point1 and Point1 is = 0
Distance between Point1 and Point2 is = 525
Distance between Point1 and Point3 is = 810
Distance between Point1 and Point4 is = 100
Distance between Point1 and Point5 is = 755
Distance between Point1 and Point6 is = 162
Distance between Point2 and Point2 is = 0
Distance between Point2 and Point3 is = 469
Distance between Point2 and Point4 is = 500
Distance between Point2 and Point5 is = 405
Distance between Point2 and Point6 is = 403
Distance between Point3 and Point3 is = 0
Distance between Point3 and Point4 is = 766
Distance between Point3 and Point5 is = 306
Distance between Point3 and Point6 is = 656
Distance between Point4 and Point4 is = 0
Distance between Point4 and Point5 is = 703
Distance between Point4 and Point6 is = 167
Distance between Point5 and Point5 is = 0
Distance between Point5 and Point6 is = 612
Distance between Point6 and Point6 is = 0


In [9]:
mat_n

[[0, 525, 810, 100, 755, 162],
 [-1, 0, 469, 500, 405, 403],
 [-1, -1, 0, 766, 306, 656],
 [-1, -1, -1, 0, 703, 167],
 [-1, -1, -1, -1, 0, 612],
 [-1, -1, -1, -1, -1, 0]]

In [82]:
def dist_matrix(points) : 
    n  = len(points)
    max_dist = 0.0
    min_dist = 10
    dist_mat = [[-1 for j in range(n)] for i in range(n)]
    for i in range(len(dist_mat)):
        for j in range (i,len(dist_mat)):
            dist_mat[i][j] = dist(points[i],points[j])
            if dist_mat[i][j] >= max_dist and dist_mat[i][j] > 0.0:
                max_dist = dist_mat[i][j]
            elif dist_mat[i][j] <= min_dist:
                min_dist = dist_mat[i][j]
                
    return dist_mat,max_dist,min_dist
    
    

In [83]:
g,h,p = dist_matrix(dataset)

In [85]:
p

0.0

In [12]:
#data load

import numpy as np
import pandas as pd
from csv import reader

In [13]:
!ls

closest.py	  iris.ipynb   README.md       vectors.py
iris-dataset.csv  __pycache__  Untitled.ipynb


In [14]:
filename = input("Enter file name :")

Enter file name :iris-dataset.csv


In [25]:
with open(filename, newline='') as iris:
    data = list(reader(iris, delimiter=','))

In [26]:
data

[['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'],
 ['4.9', '3.0', '1.4', '0.2', 'Iris-setosa'],
 ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa'],
 ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa'],
 ['5.0', '3.6', '1.4', '0.2', 'Iris-setosa'],
 ['5.4', '3.9', '1.7', '0.4', 'Iris-setosa'],
 ['4.6', '3.4', '1.4', '0.3', 'Iris-setosa'],
 ['5.0', '3.4', '1.5', '0.2', 'Iris-setosa'],
 ['4.4', '2.9', '1.4', '0.2', 'Iris-setosa'],
 ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'],
 ['5.4', '3.7', '1.5', '0.2', 'Iris-setosa'],
 ['4.8', '3.4', '1.6', '0.2', 'Iris-setosa'],
 ['4.8', '3.0', '1.4', '0.1', 'Iris-setosa'],
 ['4.3', '3.0', '1.1', '0.1', 'Iris-setosa'],
 ['5.8', '4.0', '1.2', '0.2', 'Iris-setosa'],
 ['5.7', '4.4', '1.5', '0.4', 'Iris-setosa'],
 ['5.4', '3.9', '1.3', '0.4', 'Iris-setosa'],
 ['5.1', '3.5', '1.4', '0.3', 'Iris-setosa'],
 ['5.7', '3.8', '1.7', '0.3', 'Iris-setosa'],
 ['5.1', '3.8', '1.5', '0.3', 'Iris-setosa'],
 ['5.4', '3.4', '1.7', '0.2', 'Iris-setosa'],
 ['5.1', '3.7', '1.5', '0.4', 'Iri

In [27]:
pp = []
for x in data[0][:-1] :
    pp.append(float(x))

In [28]:
pp

[5.1, 3.5, 1.4, 0.2]

In [29]:
dataset = []
for i in range(len(data)):
    temp = []
    for x in data[i][:-1]:
        temp.append(float(x))
    vec = Vector(temp)
    dataset.append(vec)

In [30]:
dataset

[<__main__.Vector at 0x7fad95d07080>,
 <__main__.Vector at 0x7fad95d070b8>,
 <__main__.Vector at 0x7fad95d070f0>,
 <__main__.Vector at 0x7fad95d07128>,
 <__main__.Vector at 0x7fad95d07160>,
 <__main__.Vector at 0x7fad95d07198>,
 <__main__.Vector at 0x7fad95d071d0>,
 <__main__.Vector at 0x7fad95d07208>,
 <__main__.Vector at 0x7fad95d07240>,
 <__main__.Vector at 0x7fad95d07278>,
 <__main__.Vector at 0x7fad95d072b0>,
 <__main__.Vector at 0x7fad95d072e8>,
 <__main__.Vector at 0x7fad95d07320>,
 <__main__.Vector at 0x7fad95d07358>,
 <__main__.Vector at 0x7fad95d07390>,
 <__main__.Vector at 0x7fad95d073c8>,
 <__main__.Vector at 0x7fad95d07400>,
 <__main__.Vector at 0x7fad95d07438>,
 <__main__.Vector at 0x7fad95d07470>,
 <__main__.Vector at 0x7fad95d074a8>,
 <__main__.Vector at 0x7fad95d074e0>,
 <__main__.Vector at 0x7fad95d07518>,
 <__main__.Vector at 0x7fad95d07550>,
 <__main__.Vector at 0x7fad95d07588>,
 <__main__.Vector at 0x7fad95d075c0>,
 <__main__.Vector at 0x7fad95d075f8>,
 <__main__.V

In [31]:
dist(dataset[148],dataset[149])

0.7681145747868608

In [32]:
dataset

[<__main__.Vector at 0x7fad95d07080>,
 <__main__.Vector at 0x7fad95d070b8>,
 <__main__.Vector at 0x7fad95d070f0>,
 <__main__.Vector at 0x7fad95d07128>,
 <__main__.Vector at 0x7fad95d07160>,
 <__main__.Vector at 0x7fad95d07198>,
 <__main__.Vector at 0x7fad95d071d0>,
 <__main__.Vector at 0x7fad95d07208>,
 <__main__.Vector at 0x7fad95d07240>,
 <__main__.Vector at 0x7fad95d07278>,
 <__main__.Vector at 0x7fad95d072b0>,
 <__main__.Vector at 0x7fad95d072e8>,
 <__main__.Vector at 0x7fad95d07320>,
 <__main__.Vector at 0x7fad95d07358>,
 <__main__.Vector at 0x7fad95d07390>,
 <__main__.Vector at 0x7fad95d073c8>,
 <__main__.Vector at 0x7fad95d07400>,
 <__main__.Vector at 0x7fad95d07438>,
 <__main__.Vector at 0x7fad95d07470>,
 <__main__.Vector at 0x7fad95d074a8>,
 <__main__.Vector at 0x7fad95d074e0>,
 <__main__.Vector at 0x7fad95d07518>,
 <__main__.Vector at 0x7fad95d07550>,
 <__main__.Vector at 0x7fad95d07588>,
 <__main__.Vector at 0x7fad95d075c0>,
 <__main__.Vector at 0x7fad95d075f8>,
 <__main__.V

In [42]:
g,h,p = dist_matrix(dataset)

In [44]:
p

7.059036761485238

In [34]:
p

0.7681145747868608

In [73]:
%matplotlib inline
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [88]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

%matplotlib inline
warnings.filterwarnings("ignore")


In [91]:
df = pd.read_csv('iris-dataset.csv', names = ['f1','f2','f3','f4','t'])
df.head()

Unnamed: 0,f1,f2,f3,f4,t
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [92]:
df = df.drop("t", axis=1)

In [93]:
df.head()

Unnamed: 0,f1,f2,f3,f4
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [103]:
m=[]
m= df.max()

In [104]:
m

f1    7.9
f2    4.4
f3    6.9
f4    2.5
dtype: float64

In [107]:
l=[]
l= df.min()

In [108]:
l

f1    4.3
f2    2.0
f3    1.0
f4    0.1
dtype: float64

In [177]:
k=(m-l)/500

In [178]:
k

f1    0.0072
f2    0.0048
f3    0.0118
f4    0.0048
dtype: float64

In [179]:
k[0]

0.007200000000000001

In [180]:
#a = [[0 for x in range(columns)] for y in range(rows)]
mat = [[-1 for i in range(4)] for j in range(150)]

In [181]:
mat

[[-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1, -1],
 [-1, -1, -1

In [182]:
original = df.as_matrix(columns=None)

In [205]:
original[5]

array([5.4, 3.9, 1.7, 0.4])

In [184]:
#for gene 1 only

In [185]:

for i in range(150):
    for j in range (4):
        if original[i][j] >= original[0][j] - k[j]  and original[i][j] <= original[i][0] + k[j]:
            mat[i][j] = 1
        

In [186]:
mat

[[1, 1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, -1, 1],
 [-1, -1, 1, 1],
 [-1, 1, 1, 1],
 [1, 1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, -1],
 [1, 1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, -1],
 [-1, -1, -1, -1],
 [1, 1, -1, 1],
 [1, 1, 1, 1],
 [1, 1, -1, 1],
 [1, 1, 1, 1],
 [1, 1, 1, 1],
 [1, 1, 1, 1],
 [1, -1, 1, 1],
 [1, 1, 1, 1],
 [-1, 1, -1, 1],
 [1, -1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, 1],
 [1, 1, 1, 1],
 [1, -1, 1, 1],
 [-1, -1, 1, 1],
 [-1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, 1, 1, -1],
 [1, 1, 1, 1],
 [-1, -1, 1, -1],
 [-1, -1, -1, 1],
 [1, 1, -1, 1],
 [-1, -1, 1, -1],
 [-1, -1, -1, 1],
 [1, -1, 1, 1],
 [-1, 1, -1, 1],
 [-1, -1, -1, 1],
 [-1, -1, -1, 1],
 [-1, 1, 1, 1],
 [1, 1, 1, 1],
 [-1, -1, 1, 1],
 [1, 1, 1, 1],
 [-1, -1, 1, 1],
 [1, 1, 1, 1],
 [-1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [-1, -1, 1, 1],
 [1, -1, 1, 1],
 [1, -1, 1, 1],
 [-1, -1, 1, 1]

In [187]:
#for all genes

In [188]:

distance1 = [[[(str(i)+" "+str(j)+" "+str(k)) for k in range(4)] for j in range(5)] for i in range(2)]

In [189]:
distance1

[[['0 0 0', '0 0 1', '0 0 2', '0 0 3'],
  ['0 1 0', '0 1 1', '0 1 2', '0 1 3'],
  ['0 2 0', '0 2 1', '0 2 2', '0 2 3'],
  ['0 3 0', '0 3 1', '0 3 2', '0 3 3'],
  ['0 4 0', '0 4 1', '0 4 2', '0 4 3']],
 [['1 0 0', '1 0 1', '1 0 2', '1 0 3'],
  ['1 1 0', '1 1 1', '1 1 2', '1 1 3'],
  ['1 2 0', '1 2 1', '1 2 2', '1 2 3'],
  ['1 3 0', '1 3 1', '1 3 2', '1 3 3'],
  ['1 4 0', '1 4 1', '1 4 2', '1 4 3']]]

In [190]:
dist = [[[0 for k1 in range(4)] for j in range(150)] for i in range(150)]

In [191]:
#k1 traverses through each gene as g1, g2,..
#i traverses through individual gene as g11, g12 or correlation with g1 
#j is for the different parameters

for k1 in range (150):    
    for i in range(150):
        for j in range (4):
            if original[i][j] >= original[k1][j] - k[j]  and original[i][j] <= original[i][0] + k[j]:
                dist[k1][i][j] = 1
 


In [204]:
dist[0][1]

[0, 0, 1, 1]

In [206]:
tot_clu =[]
for i in range(len(dist)) :
    point_clu = []
    for j in range(len(dist[i])):
        if np.sum(dist[i][j]) == 4 :
            point_clu.append(original[j])
    tot_clu.append(point_clu)




In [211]:
len(tot_clu[2])

48