# BayesDB analysis of fish bouts in relation to paramecia characteristics

## 1. Ingesting the data

### a. Launching BayesDB

In [1]:
from csv_analysis import invert_all_bouts
import pandas as pd
drct = '042318_6/'
csv_file = drct + 'huntingbouts_filt_5bout1.csv'
data = pd.read_csv(csv_file)
invert_all_bouts(data, drct)


In [2]:
%load_ext jupyter_probcomp.magics

session_id: jovyan@nightcrawler2-notebook_2018-07-29T03:19:31.024974_B


In [3]:
%matplotlib inline
%vizgpm inline

<IPython.core.display.Javascript object>

In [4]:
!rm -f 042318_6/bdb_hunts_inverted.bdb
%bayesdb -j 042318_6/bdb_hunts_inverted.bdb

u'Loaded: 042318_6/bdb_hunts_inverted.bdb'

### b. Loading and printing the data from a `.csv` file

In [5]:
%sql DROP TABLE IF EXISTS bout_table;
%bql CREATE TABLE bout_table FROM '042318_6/huntbouts_inverted.csv' 

In [6]:
%sql SELECT * FROM bout_table LIMIT 25;

Unnamed: 0,Hunt ID,Bout Number,Bout Az,Bout Alt,Bout Dist,Bout Delta Pitch,Bout Delta Yaw,Para Az,Para Alt,Para Dist,Para Az Velocity,Para Alt Velocity,Para Dist velocity,Postbout Para Az,Postbout Para Alt,Postbout Para Dist,Strike Or Abort,Inferred,Avg Para Velocity,Percent Nans in Bout
0,6.0,0.0,1.483392,0.35354,176.875575,0.208564,0.915569,1.429832,0.516095,352.929936,-0.007099,-0.001299,-1.457911,0.383256,0.572648,190.367265,1.0,1.0,1.955152,0.0
1,6.0,1.0,1.78356,0.441421,61.640767,0.336001,0.444115,0.63747,0.632546,173.94979,0.004821,-0.004269,-1.182665,-0.182772,0.340967,152.797455,1.0,0.0,1.955152,0.0
2,9.0,0.0,0.506706,-0.059321,53.007906,-0.133398,0.228283,0.323264,0.18923,281.018411,-0.055263,-0.004879,-4.54383,0.085878,0.306153,242.040221,2.0,0.0,3.464648,0.272727
3,9.0,1.0,0.622967,0.1855,40.279539,0.036006,0.159684,0.084501,0.328278,243.871697,-0.002494,-0.000956,0.474774,-0.12796,0.27989,216.75682,2.0,0.0,3.464648,0.0
4,9.0,2.0,0.011075,0.067245,51.069587,-0.01329,-0.009786,0.123433,0.263269,212.646078,-0.001821,-0.004629,-0.596787,0.116984,0.249567,168.922531,2.0,0.0,3.464648,0.0
5,9.0,3.0,0.387899,0.005546,65.568656,0.023563,0.168884,0.117435,0.245743,167.145417,0.001698,-0.0013,-2.282867,-0.285102,0.165855,123.127227,2.0,0.0,3.464648,0.0
6,9.0,4.0,0.408054,-0.082912,73.441835,-0.017405,0.186945,0.302955,0.094228,113.713507,-0.007594,-0.018352,-0.128023,-0.014317,-0.011313,68.569031,2.0,1.0,3.464648,0.0
7,19.0,0.0,1.745153,0.068519,68.030867,0.133533,0.572979,0.959816,0.405993,250.547543,-0.008977,0.002937,-1.182338,0.137396,0.429475,208.467732,1.0,0.0,2.579972,0.0
8,19.0,1.0,0.226221,0.339171,37.712165,0.045224,0.055071,0.088988,0.426451,206.439055,-0.008272,-0.001612,-0.082692,-0.019877,0.384856,178.601206,1.0,0.0,2.579972,0.0
9,19.0,2.0,0.188983,0.384293,31.361674,0.021267,0.062052,0.02146,0.384433,177.92089,0.001165,0.000561,-1.093454,-0.03494,0.368936,157.546408,1.0,0.0,2.579972,0.0


In [7]:
%bql .nullify bout_table ''
%bql .nullify bout_table 'nan'

Nullified 0 cells
Nullified 0 cells


## 2. Automatically learning a CrossCat probabilistic model

### a. Defining an analysis population

In [8]:
%bql GUESS SCHEMA FOR bout_table

Unnamed: 0,column,stattype,num_distinct,reason
0,Hunt ID,nominal,10.0,There are fewer than 20 distinct numerical va...
1,Bout Number,nominal,9.0,There are fewer than 20 distinct numerical va...
2,Bout Az,numerical,54.0,There are at least 20 unique numerical values...
3,Bout Alt,numerical,54.0,There are at least 20 unique numerical values...
4,Bout Dist,numerical,54.0,There are at least 20 unique numerical values...
5,Bout Delta Pitch,numerical,54.0,There are at least 20 unique numerical values...
6,Bout Delta Yaw,numerical,54.0,There are at least 20 unique numerical values...
7,Para Az,numerical,54.0,There are at least 20 unique numerical values...
8,Para Alt,numerical,54.0,There are at least 20 unique numerical values...
9,Para Dist,numerical,54.0,There are at least 20 unique numerical values...


In [9]:
%%mml
DROP POPULATION IF EXISTS bout_population;
CREATE POPULATION bout_population FOR bout_table WITH SCHEMA (GUESS STATTYPES OF (*);
                          IGNORE "Bout Number", "Strike Or Abort";)

### b. Creating and analyzing a probabilistic model (automatically)

In [10]:
%mml CREATE GENERATOR huntbouts_crosscat FOR bout_population;

In [11]:
%mml INITIALIZE 100 MODELS IF NOT EXISTS FOR huntbouts_crosscat;

In [12]:
%mml ANALYZE huntbouts_crosscat FOR 100 ITERATIONS (OPTIMIZED);
# note you can use the (OPTIMIZED) flag here but that gave you weird results last time. 

Completed: 100 iterations in 2.914693 seconds.
Completed: 100 iterations in 3.909709 seconds.
Completed: 100 iterations in 2.558937 seconds.
Completed: 100 iterations in 4.167481 seconds.
Completed: 100 iterations in 2.278650 seconds.
Completed: 100 iterations in 4.776017 seconds.
Completed: 100 iterations in 2.597652 seconds.
Completed: 100 iterations in 2.459723 seconds.
Completed: 100 iterations in 4.017897 seconds.
Completed: 100 iterations in 2.305793 seconds.
Completed: 100 iterations in 4.823508 seconds.
Completed: 100 iterations in 2.775622 seconds.
Completed: 100 iterations in 2.820183 seconds.
Completed: 100 iterations in 4.607478 seconds.
Completed: 100 iterations in 2.500575 seconds.
Completed: 100 iterations in 4.700339 seconds.
Completed: 100 iterations in 3.460482 seconds.
Completed: 100 iterations in 2.647906 seconds.
Completed: 100 iterations in 2.179149 seconds.
Completed: 100 iterations in 4.646113 seconds.
Completed: 100 iterations in 2.530871 seconds.
Completed: 10