## Compare the last sessions of 70645, 70644 and 71343 with the rest of the deep layers / layer II (arch) recordings

In [1]:
# ts fresh stuff

Load dataset for layer2 (superficial inhibited Dave's arch mice) from hard disk. 

In [2]:
# the following two lines indicate that external functions are auto-reloaded as soon as they change. 
%load_ext autoreload
%autoreload 2
# Print statements 
from __future__ import print_function # Python 2.x

In [3]:
# General stuff:
import sys
import argparse
import os
import json
import numpy as np
import math
import psycopg2
import cPickle
import numpy as np
import pandas as pd
from datetime import date
from tqdm import tqdm_notebook

# Plotting:
from matplotlib import pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
import matplotlib as mpl
%matplotlib inline

# External functions from subfolder /database_helpers. 
# as soon as you change something in there and press save, it will auto reload on next execution.
from database_helpers.psql_start import *
from database_helpers.create_tables import *
from database_helpers.write2tables import *
from postgres_analysis_helpers.general import *
from postgres_analysis_helpers.get_data import *
from tsfresh import extract_features

from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute


# register pickle type to retrieve binary data from database
psycopg2.extensions.register_type(psycopg2.extensions.new_type(psycopg2.BINARY.values, 'BINARY-PICKLE', cast_pickle))

Loaded analysis helpers: General
Loaded postgres_analysis_helpers -> general
Loaded postgres_analysis_helpers -> get_data


In [4]:
db_status = test_connect()
if db_status == False:
    print('Grrr... no database connection could be established.')
else:
    print('Yippiyeah! Database connection is established!')

Connecting to the PostgreSQL database...
Yippiyeah! Database connection is established!


### Make an empty dataframe and call the retrieval function

In [5]:
# if you want to save
export_path_pickle  = r"C:\work\python\klusta_analysis\postgres_notebooks\export_dataframes"
export_path_pickle = "/".join(export_path_pickle.split("\\"))
#base_dataframe.to_pickle(export_path_pickle + "/70645_70644_71343.pkl")

In [6]:
deep_layer_confirmed = pd.read_pickle(export_path_pickle + "/70645_70644_71343.pkl")

In [7]:
deep_layer_confirmed.head(2)

Unnamed: 0,tetrode_no,animal_id,n_drive_user,session_ts,cluster_no,session_name,masked_ratemap,informationcontent,gridstats_ellipse_3,hdpeakrate,...,phase_stats_var,rayleigh_p,spike_trig_lfp,mean_freq,spike_no,mean_wf,maxima_wf,std_wf,lfp_session,theta_freq
0,8,70645,horsto,2017-02-19 10:30:13,2,19022017s1,"[[0.00567601919074, 0.0054057056967, 0.0051521...",0.262193,13.7871,0.341274,...,3.43028,0.000474072,spike_trig_LFP_avg spike_trig_LFP_strong...,0.21266,383,"{0: [-0.00522193211488, 0.318537859008, 0.4046...","{0: 20.5483028721, 1: 51.0861618799, 2: 20.214...","{0: [12.0560039489, 11.9957714897, 11.90215456...",time eeg0 eeg1 eeg2 eeg3 eeg_...,8.76667
1,8,70644,horsto,2017-03-29 09:00:50,2,29032017s1,"[[--, --, --, --, --, --, 0.0211151713571, --,...",0.947181,22.4575,8.09462,...,5.46125,0.000291955,spike_trig_LFP_avg spike_trig_LFP_strong...,0.762698,916,"{0: [-2.01965065502, -2.97598253275, -4.481441...","{0: 33.903930131, 1: 17.6462882096, 2: 3.95305...","{0: [4.64288513865, 4.79668185163, 4.989583527...",time eeg0 eeg1 eeg2 eeg3 eeg_...,8.26667


In [8]:
deep_layer_confirmed.columns

Index([u'tetrode_no', u'animal_id', u'n_drive_user', u'session_ts',
       u'cluster_no', u'session_name', u'masked_ratemap',
       u'informationcontent', u'gridstats_ellipse_3', u'hdpeakrate',
       u'gridstats_ellipse_1', u'gridstats_ellipse_5', u'gridstats_ellipse_4',
       u'gridstats_ellipsetheta', u'numfields', u'meandirection',
       u'stabilityhalf', u'thetaindex', u'meanrate', u'coherence',
       u'peakdirection', u'gridstats_orientation_3', u'fieldmain', u'sparsity',
       u'gridscore', u'gridstats_orientation_2', u'gridstats_orientation_1',
       u'borderscore', u'peakrate', u'mvl', u'gridstats_ellipse_2',
       u'informationrate', u'meanrateoutsidefields', u'selectivity',
       u'speedscore', u'gridstats_spacing_2', u'gridstats_spacing_3',
       u'gridstats_spacing_1', u'bins_angle_center', u'tc_stats_mvl',
       u'tc_stats_mean', u'tc_stats_var', u'hist_angle_smooth', u'theta_freq',
       u'hist_isi', u'bin_edges_isi', u'isi_stats_percent_bursts',
       u'isi_

"Deep layer confirmed" contains dataset comprising 70645,70644,71343

In [9]:
deep_layer_confirmed.tail(2)

Unnamed: 0,tetrode_no,animal_id,n_drive_user,session_ts,cluster_no,session_name,masked_ratemap,informationcontent,gridstats_ellipse_3,hdpeakrate,...,phase_stats_var,rayleigh_p,spike_trig_lfp,mean_freq,spike_no,mean_wf,maxima_wf,std_wf,lfp_session,theta_freq
822,6,71343,horsto,2017-02-20 17:18:35,9,20022017s1,,0.67897,11.1775,0.16548,...,,,,,,,,,time eeg0 eeg1 eeg2 eeg3 eeg_...,9.2
823,5,71343,horsto,2017-02-17 11:34:41,13,17022017s1,,0.196932,9.18037,1.05454,...,,,,,,,,,time eeg0 eeg1 eeg2 eeg3 eeg_...,8.33333


In [10]:
deep_layer_confirmed.dropna(subset=['tetrode_no','cluster_no'],inplace=True)
deep_layer_confirmed.drop_duplicates(subset=['session_ts','animal_id','tetrode_no','cluster_no'],inplace=True)

In [11]:
deep_layer_confirmed.sort_values('session_ts',ascending=True, inplace=True)
deep_layer_confirmed.reset_index(drop=True,inplace=True)

#### ts fresh

In [12]:
# load superficial layer dataset

In [13]:
lfp_cells_sup = pd.read_pickle(export_path_pickle + "/lfp_cells_superficial.pkl")

In [14]:
lfp_cells_sup.animal_id.unique()

array(['70114', 'T4743', '70110', 'T4935', '70113'], dtype=object)

### take only the last 5 sessions for 70645, 70644 and 71343

In [15]:
_70645_last = deep_layer_confirmed[deep_layer_confirmed.animal_id == '70645']
_70645_last.drop_duplicates(subset=['session_ts'],inplace=True) 
_70645_last.reset_index(drop=True,inplace=True)
_70645_last = _70645_last[[x in _70645_last.session_ts.unique()[-5:] for x in _70645_last.session_ts]]
    
_70644_last = deep_layer_confirmed[deep_layer_confirmed.animal_id == '70644']
_70644_last.drop_duplicates(subset=['session_ts'],inplace=True) 
_70644_last.reset_index(drop=True,inplace=True)
_70644_last = _70644_last[[x in _70644_last.session_ts.unique()[-5:] for x in _70644_last.session_ts]]

_71343_last = deep_layer_confirmed[deep_layer_confirmed.animal_id == '71343']
_71343_last.drop_duplicates(subset=['session_ts'],inplace=True) 
_71343_last.reset_index(drop=True,inplace=True)
_71343_last = _71343_last[[x in _71343_last.session_ts.unique()[-5:] for x in _71343_last.session_ts]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return func(*args, **kwargs)


In [16]:
lfp_cells_sup.animal_id.unique()

array(['70114', 'T4743', '70110', 'T4935', '70113'], dtype=object)

### edit dataframe structure ... 

In [80]:
mouse_labels = dict()
mouse_labels['70645'] = 0
mouse_labels['70644'] = 1
mouse_labels['71343'] = 2

mouse_labels['70114'] = 3
mouse_labels['T4743'] = 4
mouse_labels['70110'] = 5
mouse_labels['T4935'] = 6
mouse_labels['70113'] = 7

In [81]:
mouse_labels

{'70110': 5,
 '70113': 7,
 '70114': 3,
 '70644': 1,
 '70645': 0,
 '71343': 2,
 'T4743': 4,
 'T4935': 6}

In [103]:
max_snippet = 5000 # this corresponds to 4 seconds sampled at 250 Hz
time_axis = np.arange(0,max_snippet)/250.

#labels = []

counter = 0 
for no,df in tqdm_notebook(enumerate([_70645_last,_70644_last,_71343_last,lfp_cells_sup])):
    for i in tqdm_notebook(xrange(len(df))):
        
        #if df.iloc[i].animal_id in ['70645','70644','71343']:
        #    label_col = np.zeros(max_snippet) # '0' for deep layers 
        #else:
        #    label_col = np.zeros(max_snippet) + 1 # '1' for sup layers

        #labels.append(label_col)

        #id_column = np.array(((df.iloc[i].animal_id + " ")* (max_snippet-1)).split(" "))
        #print(df.iloc[i].animal_id)
        id_column = np.array(np.zeros(max_snippet) + mouse_labels[df.iloc[i].animal_id],dtype=int)
        
        
        moving = df.lfp_session.iloc[i].eeg_mean.values[df.lfp_session.iloc[i].speed > 5][:max_snippet]
        still = df.lfp_session.iloc[i].eeg_mean.values[df.lfp_session.iloc[i].speed < 5][:max_snippet]

        # pad values if there are not enough:
        moving = np.lib.pad(moving, (0, max_snippet - len(moving)), 'wrap')
        still = np.lib.pad(still, (0, max_snippet - len(still)), 'wrap')

        
        #lfp_temp = pd.DataFrame({'time': time_axis, 'moving': moving,'still':still,'id':id_column})
        lfp_temp = pd.DataFrame({'time': time_axis, 'moving': moving,'id':id_column})

        if counter == 0:
            lfp_for_tsfresh = pd.DataFrame(lfp_temp)      
        else:
            lfp_for_tsfresh = pd.concat([lfp_for_tsfresh,lfp_temp])

        counter += 1




In [104]:
#labels =[item for sublist in labels for item in sublist]

In [105]:
lfp_for_tsfresh.id.unique()

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

In [106]:
lfp_for_tsfresh.dropna(inplace=True,how='any')
lfp_for_tsfresh = lfp_for_tsfresh[pd.isnull(lfp_for_tsfresh.id) == False]
lfp_for_tsfresh = lfp_for_tsfresh[np.isfinite(lfp_for_tsfresh.moving) == True]
lfp_for_tsfresh.reset_index(drop=True,inplace=True)

In [107]:
len(lfp_for_tsfresh)

265000

In [108]:
#filter out bad labels:
#labels = np.array(labels,dtype=np.int64)[[len(x) > 0 for x in lfp_for_tsfresh.id.values]]
#lfp_for_tsfresh = lfp_for_tsfresh[[len(x) > 0 for x in lfp_for_tsfresh.id.values]]

In [109]:
len(lfp_for_tsfresh),len(labels)

(265000, 8)

In [110]:
lfp_for_tsfresh.id.unique()

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

In [111]:
lfp_for_tsfresh.head(4)

Unnamed: 0,id,moving,time
0,0,0.0,0.0
1,0,0.0,0.004
2,0,-34.25,0.008
3,0,-46.5,0.012


### start by extracting features

In [112]:
lfp_for_tsfresh.id.unique()

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

In [113]:
labels = [0,0,0,1,1,1,1,1]
len(labels)

8

In [114]:
labels = pd.Series(labels)

In [115]:
labels

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
dtype: int64

In [116]:
print('starting ts fresh....')
extracted_features = extract_features(lfp_for_tsfresh, column_id="id", column_sort="time")
impute(extracted_features)

starting ts fresh....


Feature Extraction:   0%|                                                                                                                                                                                             | 0/8 [00:00<?, ?it/s]


MemoryError: 

In [101]:
labels

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
dtype: int64

In [99]:
features_filtered = select_features(extracted_features, labels)

export_path_pickle  = r"C:\work\python\klusta_analysis\postgres_notebooks\export_dataframes"
export_path_pickle = "/".join(export_path_pickle.split("\\"))
features_filtered.to_pickle(export_path_pickle + "/features_filtered.pkl")
print('finished!')

Feature Selection: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 222/222 [00:04<00:00, 46.05it/s]


finished!


In [100]:
features_filtered

0
1
2
6
7
3
5
4
