### Summer Workshop
#### July 6, 2018
##### Author: Dhananjay Bhaskar <dhananjay_bhaskar@brown.edu>

### Functions
- Also called methods, procedures

In [36]:
# Takes 3 arguments, returns a tuple with 2 values
def myfunction(x, y, z):
    return (x + y, y + z)

In [37]:
myfunction(3, 4, 1)

(7, 5)

In [38]:
# Takes a list argument
# Returns sum of list
def summation(num_list):
    return sum(num_list)

In [39]:
summation([1, 2.2, 3, 4, 55])

65.2

### Classes
Create a class to represent points in 3D
- Classes are used to define complex data structures
- A class usually represents a single concept/idea

In [40]:
import numpy as np

class Point3D:
    
    # Constructor
    def __init__(self, x, y, z):
        self.x = x
        self.y = y
        self.z = z
        self.vec = [x,y,z]
        
    def print_vec(self):
        print repr(self.vec)
        
    def compute_dist(self, other_point):
        pt1 = np.array(self.vec)
        pt2 = np.array(other_point.vec)
        return np.linalg.norm(pt1-pt2)

In [41]:
# Create Point3D objects
p1 = Point3D(1, 3, 5)
p2 = Point3D(2, 5, 7)

In [42]:
# Call print_vec function
p1.print_vec()
p2.print_vec()

[1, 3, 5]
[2, 5, 7]


In [43]:
# Compute distance between points
p1.compute_dist(p2)

3.0

In [44]:
# Another way to compute distance
Point3D.compute_dist(p1, p2)

3.0

### Libraries/Modules

In [45]:
import string
from graphviz import Graph

In [46]:
# Create undirected graph
word_graph = Graph()

In [47]:
# Parse all words in file, separated by space
with open('../miniwordlist.txt') as f:
    words = f.read().split()

In [48]:
len(words)

1657

In [49]:
# Create nodes using graphviz
for word in words:
    word_graph.node(word)

In [50]:
# Assume strings a and b have equal length
# Compute number of letters the strings differ by
def hamming_dist(a, b):
    res = 0
    for i in range(len(a)):
        res += a[i] != b[i]
    return res

In [51]:
hamming_dist('rat', 'rag')

1

In [52]:
hamming_dist('rat', 'pan')

2

In [57]:
# Aside: ordering of elements in a set does not matter
set([1,6]) == set([6,1])

True

In [53]:
%%time
# Create edges for first 200 words in list
# TODO: find a more efficient implementation
num_edges = 0

pairs_seen = list()

for word1 in words[:200]:
    for word2 in words[:200]:
        
        pair = set([word1, word2])
        if pair in pairs_seen:
            continue
        else:
            pairs_seen.append(pair)
        
        if hamming_dist(word1, word2) == 1:
            num_edges += 1
            word_graph.edge(word1, word2)

CPU times: user 11.1 s, sys: 92 ms, total: 11.2 s
Wall time: 11.1 s


In [54]:
print num_edges

392


In [55]:
# Write file
outfile = open('graph.gv', "w")
outfile.write(word_graph.source)
outfile.close()

In [56]:
# Use a Linux/Bash command
import os
os.system("neato -x -Tpdf graph.gv > graph.pdf")

0