# BayesDB analysis of fish bouts in relation to paramecia characteristics

## 1. Ingesting the data

### a. Launching BayesDB

In [14]:
from csv_analysis import invert_all_bouts
import pandas as pd
#drct = '042318_6/'
drct = 'wik_bdb/'
csv_file = drct + 'all_huntbouts_rev_strikes.csv'
data = pd.read_csv(csv_file)
#invert_all_bouts(data, drct)


In [15]:
%load_ext jupyter_probcomp.magics

The jupyter_probcomp.magics extension is already loaded. To reload it, use:
  %reload_ext jupyter_probcomp.magics


In [16]:
%matplotlib inline
%vizgpm inline

<IPython.core.display.Javascript object>

In [17]:
#!rm -f wik_bdb/bdb_hunts_inverted.bdb
#%bayesdb -j wik_bdb/bdb_hunts_inverted.bdb

#!rm -f wik_bdb/bdb_hunts_marr.bdb
#%bayesdb -j wik_bdb/bdb_hunts_marr.bdb

!rm -f wik_bdb/bdb_hunts_strikes.bdb
%bayesdb -j wik_bdb/bdb_hunts_strikes.bdb



u'Loaded: wik_bdb/bdb_hunts_strikes.bdb'

### b. Loading and printing the data from a `.csv` file

In [18]:
%sql DROP TABLE IF EXISTS bout_table;
#%bql CREATE TABLE bout_table FROM 'wik_bdb/huntbouts_inverted.csv' 
%bql CREATE TABLE bout_table FROM 'wik_bdb/all_huntbouts_rev_strikes.csv' 

In [19]:
%sql SELECT * FROM bout_table LIMIT 25;

Unnamed: 0,Bout Number,Bout Az,Bout Alt,Bout Dist,Bout Delta Pitch,Bout Delta Yaw,Para Az,Para Alt,Para Dist,Para Az Velocity,Para Alt Velocity,Para Dist Velocity
0,0.0,-1.517935,0.209603,79.683312,0.154864,-0.597033,-0.977032,0.62918,354.291281,-0.018158,0.248046,63.812992
1,1.0,-0.091744,-0.041809,36.699616,-0.00372,-0.025881,-0.162832,0.654781,305.528267,-0.16741,-0.006146,59.091504
2,2.0,-0.696157,1.136643,99.484831,0.394846,-0.325807,-0.179093,0.818221,281.527199,-0.009887,-0.040281,40.084665
3,3.0,0.076014,0.09679,44.498897,0.006143,-0.006103,0.073963,0.21028,134.098036,-0.201641,-0.130749,28.201272
4,4.0,-0.091702,-0.448835,37.549377,-0.230556,-0.013962,0.108441,0.299536,77.905007,-1.06672,-0.747382,56.823983
5,0.0,-0.765298,0.13486,46.688688,0.016697,-0.213122,-0.442195,0.487576,78.876586,0.516194,-1.271139,91.357699
6,0.0,0.381012,-0.009323,106.672823,0.050132,0.288992,0.461436,0.400363,372.395718,0.05693,-0.152271,18.417169
7,1.0,0.249314,0.245233,108.092195,0.157152,0.182348,0.198364,0.499294,282.973273,0.002776,-0.207885,22.815677
8,2.0,-0.177434,0.370098,71.89619,0.098214,-0.097365,-0.119681,0.392236,171.041294,-0.113498,-0.777826,-23.743168
9,3.0,-0.032516,-0.027963,41.116158,0.007806,0.015926,0.039726,0.164327,102.885812,0.144669,-1.242611,58.623923


In [20]:
%bql .nullify bout_table ''
%bql .nullify bout_table 'nan'

Nullified 0 cells
Nullified 0 cells


## 2. Automatically learning a CrossCat probabilistic model

### a. Defining an analysis population

In [21]:
%bql GUESS SCHEMA FOR bout_table

Unnamed: 0,column,stattype,num_distinct,reason
0,Bout Number,nominal,25.0,There are fewer than 20 distinct numerical va...
1,Bout Az,numerical,1402.0,There are at least 20 unique numerical values...
2,Bout Alt,numerical,1402.0,There are at least 20 unique numerical values...
3,Bout Dist,numerical,1402.0,There are at least 20 unique numerical values...
4,Bout Delta Pitch,numerical,1399.0,There are at least 20 unique numerical values...
5,Bout Delta Yaw,numerical,1402.0,There are at least 20 unique numerical values...
6,Para Az,numerical,1402.0,There are at least 20 unique numerical values...
7,Para Alt,numerical,1402.0,There are at least 20 unique numerical values...
8,Para Dist,numerical,1402.0,There are at least 20 unique numerical values...
9,Para Az Velocity,numerical,1400.0,There are at least 20 unique numerical values...


In [22]:
%%mml
DROP POPULATION IF EXISTS bout_population;
CREATE POPULATION bout_population FOR bout_table WITH SCHEMA (GUESS STATTYPES OF (*);
                          IGNORE "Bout Number";)

In [None]:
%%mml
DROP POPULATION IF EXISTS bout_population;
CREATE POPULATION bout_population FOR bout_table WITH SCHEMA (GUESS STATTYPES OF (*);
                          IGNORE "Hunt ID", "Bout Number", "Strike Or Abort", "Inferred", "Percent Nans in Bout", 
                                "Cluster Membership", "Avg Para Velocity", "Postbout Para Az", "Postbout Para Alt", 
                                "Postbout Para Dist";)

### b. Creating and analyzing a probabilistic model (automatically)

In [23]:
%mml CREATE GENERATOR huntbouts_crosscat FOR bout_population;

In [24]:
%mml INITIALIZE 50 MODELS IF NOT EXISTS FOR huntbouts_crosscat;

In [25]:
%mml ANALYZE huntbouts_crosscat FOR 100 ITERATIONS (OPTIMIZED);
# note you can use the (OPTIMIZED) flag here but that gave you weird results last time. 

Completed: 100 iterations in 22.051533 seconds.
Completed: 100 iterations in 25.010735 seconds.
Completed: 100 iterations in 21.015184 seconds.
Completed: 100 iterations in 20.247600 seconds.
Completed: 100 iterations in 27.024532 seconds.
Completed: 100 iterations in 37.410548 seconds.
Completed: 100 iterations in 30.212003 seconds.
Completed: 100 iterations in 25.125406 seconds.
Completed: 100 iterations in 30.727558 seconds.
Completed: 100 iterations in 24.992102 seconds.
Completed: 100 iterations in 20.142974 seconds.
Completed: 100 iterations in 29.104442 seconds.
Completed: 100 iterations in 15.223624 seconds.
Completed: 100 iterations in 49.329785 seconds.
Completed: 100 iterations in 24.577059 seconds.
Completed: 100 iterations in 27.063289 seconds.
Completed: 100 iterations in 24.465906 seconds.
Completed: 100 iterations in 66.622996 seconds.
Completed: 100 iterations in 28.874128 seconds.
Completed: 100 iterations in 21.521279 seconds.
Completed: 100 iterations in 21.307270 s