In [2]:
# Import packages
%matplotlib inline
import matplotlib 
import matplotlib.pyplot as plt
from matplotlib import cm as CM
from matplotlib import mlab as ML
import pandas as pd
import numpy as np



# Exploring the Data

## First we answer basic questions about our data

#### Question: What is the data?

In [3]:
# Read in data
df = pd.read_csv('../output.csv')
print "Data types:"
print df.dtypes

Data types:
cx          int64
cy          int64
cz          int64
unmasked    int64
synapses    int64
dtype: object


#### Conclusion: 
"cx", "cy", and "cz" denote the unique location of a "bin", which corresponds to an arbitrary volume of cortical tissue. "synapses" is an integer count of the number of synapses found within the voxel. 

"unmasked" informs how much of the bin (denoted by cx, cy, and cz) was deemed suitable for finding a synapse. Each bin was comprised of many individual voxels of the EM image. A synapse could technically be found at any given voxel. However, a subset of these voxels were pre-determined (not by us) to contain material that are not synapses (i.e. cell bodies). These voxels were "masked". Thus the "unmasked" voxels comprise the proportional area of each bin in which a synapse may reside. 

#### Question: How many Nan, Inf, or other "bad" data values are there?

In [5]:
isNan = df.isnull()
isInf = np.isinf(df)
isNeg = df < 0

print "Number of nan values by column:"
print isNan.sum(), "\n"
print "Number of rows with nan values:", isNan.sum(1).sum(), "\n"

print "Number of inf values by column:"
print isInf.sum(), "\n"
print "Number of rows with inf values:", isInf.sum(1).sum(), "\n"

print "Number of negative values by column:"
print isNeg.sum(), "\n"
print "Number of rows with negative values:", isNeg.sum(1).sum(), "\n"

Number of nan values by column:
cx          0
cy          0
cz          0
unmasked    0
synapses    0
dtype: int64 

Number of rows with nan values: 0 

Number of inf values by column:
cx          0
cy          0
cz          0
unmasked    0
synapses    0
dtype: int64 

Number of rows with inf values: 0 

Number of negative values by column:
cx          0
cy          0
cz          0
unmasked    0
synapses    0
dtype: int64 

Number of rows with negative values: 0 



#### Conclusion:
There is no "bad" data in our data set

#### Question: What is the "size" of our data set?
   * how many synapses are there?
   * how many bins are there?

In [6]:
nSyn = df['synapses'].sum()
print "There are", nSyn, "total synapses in the data."

There are 7704178 total synapses in the data.


In [7]:
nBins = df['synapses'].count()
print "There are", nBins, "total 3D bins."

There are 61776 total 3D bins.
