# Data analysis basic examples

In [1]:
import numpy
import sys
numpy.set_printoptions(threshold=sys.maxsize)
numpy.set_printoptions(linewidth=500)

In [2]:
data_file = open("data/KarateClub.csv", "r")

In [3]:
# parse edges from data file

edges = []

for line in data_file :
    values = line.strip().split(";")
    edges.append(values)

In [4]:
edges = numpy.array(edges).astype(numpy.int32)

### Adjacency list

In [5]:
adjacency_dic = {}
for e in edges:
    if e[0] not in adjacency_dic:
        adjacency_dic[e[0]] = [e[1]]
    else:
        adjacency_dic[e[0]].append(e[1])
        
    if e[1] not in adjacency_dic:
        adjacency_dic[e[1]] = [e[0]]
    else:
        adjacency_dic[e[1]].append(e[0])
    
for key in sorted(adjacency_dic):
    print ("{} -> {}".format(key, adjacency_dic[key]))

1 -> [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 18, 20, 22, 32]
2 -> [1, 3, 4, 8, 14, 18, 20, 22, 31]
3 -> [1, 2, 4, 8, 9, 10, 14, 28, 29, 33]
4 -> [1, 2, 3, 8, 13, 14]
5 -> [1, 7, 11]
6 -> [1, 7, 11, 17]
7 -> [1, 5, 6, 17]
8 -> [1, 2, 3, 4]
9 -> [1, 3, 31, 33, 34]
10 -> [3, 34]
11 -> [1, 5, 6]
12 -> [1]
13 -> [1, 4]
14 -> [1, 2, 3, 4, 34]
15 -> [33, 34]
16 -> [33, 34]
17 -> [6, 7]
18 -> [1, 2]
19 -> [33, 34]
20 -> [1, 2, 34]
21 -> [33, 34]
22 -> [1, 2]
23 -> [33, 34]
24 -> [26, 28, 30, 33, 34]
25 -> [26, 28, 32]
26 -> [24, 25, 32]
27 -> [30, 34]
28 -> [3, 24, 25, 34]
29 -> [3, 32, 34]
30 -> [24, 27, 33, 34]
31 -> [2, 9, 33, 34]
32 -> [1, 25, 26, 29, 33, 34]
33 -> [3, 9, 15, 16, 19, 21, 23, 24, 30, 31, 32, 34]
34 -> [9, 10, 14, 15, 16, 19, 20, 21, 23, 24, 27, 28, 29, 30, 31, 32, 33]


## Adjacency matrix

In [6]:
# prepare matrix
len = edges.max()+1
matrix = numpy.zeros((len, len))

In [7]:
# fill matrix
matrix[edges[:,0], edges[:,1]] = 1
matrix[edges[:,1], edges[:,0]] = 1

In [8]:
# print matrix
print(matrix[1:,1:])

[[0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0.]
 [1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

### Both axis (x,y) represent node numbers. Values in matrix mark adjacency - value of 1 means nodes are adjacent. 

# Incidence Matrix

In [9]:
# prepare matrix
incidence_matrix = numpy.zeros((edges.shape[0],edges.max()+1))

In [10]:
# fill matrix
for i, e in enumerate(edges): 
    incidence_matrix[i,e[0]] = 1
    incidence_matrix[i,e[1]] = 1
    

In [11]:
# print matrix
print(incidence_matrix[:,1:])

[[1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

### Axis x represents node number. Axis y represents edge number. Values in matrix represent connection. 
### Each line includes nodes connected with a given edge.

In [12]:
#