# Visualize Database
- Following thread "http://www.austintaylor.io/d3/python/pandas/2016/02/01/create-d3-chart-python-force-directed/"
- Attempting to visualize the dvdrentals sample postgres database

## The Network Structure
- A dictionary with two lists, nodes and links.
- links contains the relationships between nodes
- nodes contains each individual node

```json
{
  "nodes":  [
    { "name": "desktop", "group":  1},
    { "name": "desktop/apples.txt", "group":  1},
    { "name": "desktop/pineapple/apples.txt", "group":  1},
    { "name": "desktop/bananas.txt", "group":  1}
  ],

  "links":  [
    { "source":  1,  "target":  0,  "value":  5555 },
    { "source":  2,  "target":  0,  "value":  1 },
    { "source":  3,  "target":  0,  "value": 1 }
  ]
}
```

## Setup

### Modules

In [1]:
import os
import pandas
import json

### Postgres SQL code generating the csv below

```SQL

-- Drop table if it exists

DROP TABLE
	test_data

-- Create new table containing column and table information from dvdrentals database

SELECT 
	*
INTO 
	test_data 
FROM (
SELECT
	table_schema,
	table_name,
	column_name
FROM
	information_schema.columns
WHERE
	table_schema = 'public' AND
	table_name IN ('actor','address','category','city','country',
				   'customer','film','film_actor','film_category','inventory',
				   'language','payment','rental','staff','store')
) AS subset

-- Export to disk
COPY
	(SELECT * FROM test_data)
TO
	'/users/danielcorcoran/desktop/github_repos/python_nb_networks/raw data/dvdrentals_column_data.csv'
DELIMITER
	','
CSV HEADER

```

### Data File Path 

In [2]:
data_path = "c:/users/daniel/desktop/python_nb_networks/raw data/dvdrentals_column_data.csv"

### Read in data, add columns

In [3]:
raw_data = pandas.read_csv(data_path)

In [4]:
raw_data.head()

Unnamed: 0,table_schema,table_name,column_name
0,public,staff,staff_id
1,public,staff,first_name
2,public,staff,last_name
3,public,staff,address_id
4,public,staff,email


## Manipulate raw data into desired format

In [5]:
subset1 = raw_data[["table_schema", "table_name"]]
subset2 = raw_data[["table_name", "column_name"]]

In [6]:
subset1.head()

Unnamed: 0,table_schema,table_name
0,public,staff
1,public,staff
2,public,staff
3,public,staff
4,public,staff


In [7]:
subset2.head()

Unnamed: 0,table_name,column_name
0,staff,staff_id
1,staff,first_name
2,staff,last_name
3,staff,address_id
4,staff,email


In [8]:
subset1.columns = ["source", "target"]
subset2.columns = ["source", "target"]

In [9]:
subset1["structure"] = "table"
subset2["structure"] = "column"

subset1["colour_code"] = "#4ABBDB"
subset2["colour_code"] = "#FFAA00"

subset1["node_size"] = 6
subset2["node_size"] = 4

In [10]:
data = pandas.concat([subset1, subset2], axis = 0)

In [11]:
data.head()

Unnamed: 0,source,target,structure,colour_code,node_size
0,public,staff,table,#4ABBDB,6
1,public,staff,table,#4ABBDB,6
2,public,staff,table,#4ABBDB,6
3,public,staff,table,#4ABBDB,6
4,public,staff,table,#4ABBDB,6


In [12]:
data.shape

(172, 5)

In [13]:
data.drop_duplicates(inplace = True)

In [14]:
data.reset_index(drop = True, inplace = True)

## Build Json

### Helper Functions

In [15]:
def get_unique_nodes_from_list(list_of_dictionaries, seeking_key = "name"):
   
    the_list = list(map(lambda dictionary: dictionary[seeking_key], list_of_dictionaries))
    
    return the_list    

### Set source and target column headers

In [16]:
source_header = "source"
target_header = "target"

### Convert items to strings

In [17]:
data[source_header] = data[source_header].astype(str)

In [18]:
data[target_header] = data[target_header].astype(str)

## Process

In [19]:
nodes_list = []
links_list = []

row_count = data.shape[0]

### Iterate through each row in the dataset

In [20]:
for index in range(row_count):
    
    # At the row level, store the target text, source text and other optional parameters such as colour, node_size etc
    target_text = data.loc[index, target_header]
    source_text = data.loc[index, source_header]
    html_colour = data.loc[index, "colour_code"]
    size = str(data.loc[index, "node_size"])
    
    # Create a unique list of current nodes in nodes list
    current_nodes = get_unique_nodes_from_list(nodes_list)
    
    # Check if target and source text exists in the current nodes list. if they dont create them
    if target_text not in current_nodes:
        nodes_list.append({"name":target_text, "colour": html_colour, "size":size})
        
    if source_text not in current_nodes:
        nodes_list.append({"name":source_text, "colour": html_colour, "size":size})
        
    # Recreate unique nodes list after the changes made above
    updated_nodes = get_unique_nodes_from_list(nodes_list)
    
    # Locate the index of the 
    target_node_index = updated_nodes.index(target_text)
    source_node_index = updated_nodes.index(source_text)
    
    links_list.append({"source":source_node_index, 
                        "target":target_node_index,
                        "value":10})

### Create dictionary storing links_list and nodes_list together

In [21]:
json_data = {"links": links_list,"nodes":nodes_list}

## Export Json

### Convert python dictionary to json string

In [22]:
json_dump = json.dumps(json_data, indent=1, sort_keys=True)

### Export to filename 'pcap_export.json' to be used in index.html

In [23]:
json_out = open("json/dvdrental_database_network_2.json", "w")
json_out.write(json_dump)
json_out.close()