In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.io as sio
sns.set()
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier,  AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import plotly
import plotly.graph_objs as go

# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()

## Load files
Data files were created via query code located at 
https://github.com/corinneteeter/multipatch_analysis/tree/autoencoder/analyses/autoencoder

Embedding are generated via https://github.com/AllenInstitute/synphys

In [None]:
# loading files from allen file system mat directory
# mount_point    = '/Users/fruity/Remote-AI-root/allen/aibs' #Rohan's mount point
# mount_point    = '/allen/aibs' #Corinne's mount point
# data_dir       = mount_point + 'mat/Corinne/autoencoder/'
#embeddings_dir = mount_point+'mat/Corinne/autoencoder/embeddings/'

#Load Corinne's data file:
#dat_df=pd.read_csv(data_dir+'ae_data_50hz_sequential_10_24_2019.csv', sep='#',low_memory=False)

#Load embeddings:
#mat = sio.loadmat(embeddings_dir+'TEST_cv_0_pd_0-2_bs_1000_ld_2_ne_10000_ri_0-summary.mat, \
#                  squeeze_me=True)

In [None]:
# loading files locally
#dat_df=pd.read_csv('ae_data_50hz_sequential_10_24_2019.csv', sep='#',low_memory=False) #file that didn't have pre_class and post_class
dat_df=pd.read_csv('ae_data_50hz_sequential_10_24_2019UPDATE.csv', sep='#',low_memory=False) #this file has classes saved within
embed2D = sio.loadmat('TEST_cv_0_pd_0-2_bs_1000_ld_2_ne_10000_ri_0-summary.mat', squeeze_me=True)
embed3D = sio.loadmat('TEST-3D_cv_0_pd_0-2_bs_1000_ld_3_ne_10000_ri_0-summary.mat', squeeze_me=True)
dat_df.keys()

In [None]:
#Separate out the annotations. Note this might change for different data files
anno_df = dat_df[dat_df.keys().values[0:11]]
print('anno_df: ', anno_df.columns.values)

In [None]:
# This cell is only needed if this is not assigned in the original file.  Now this is implimented by most 
# query calls at
# https://github.com/corinneteeter/multipatch_analysis/blob/autoencoder/analyses/autoencoder/lib.py
#
# # make some new catagories to deal with unknown cre_lines
# anno_df['pre_class']=anno_df['pre_cre']
# anno_df['post_class']=anno_df['post_cre']
# anno_df['pre_class'].loc[anno_df['species'] == 'human'] = 'human'
# anno_df['post_class'].loc[anno_df['species'] == 'human'] = 'human'

# anno_df['pre_class'].loc[(anno_df['pre_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['pre_layer'] == '2/3')] = '2/3'
# anno_df['post_class'].loc[(anno_df['post_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['post_layer'] == '2/3')] = '2/3'

# anno_df['pre_class'].loc[(anno_df['pre_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['pre_layer'] == '4')] = '4'
# anno_df['post_class'].loc[(anno_df['post_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['post_layer'] == '4')] = '4'

# anno_df['pre_class'].loc[(anno_df['pre_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['pre_layer'] == '6')] = '6'
# anno_df['post_class'].loc[(anno_df['post_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['post_layer'] == '6')] = '6'

# anno_df['pre_class'].loc[(anno_df['pre_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['pre_layer'] == '5')] = '5'
# anno_df['post_class'].loc[(anno_df['post_cre'] == 'unknown') & (anno_df['species'] == 'mouse') & (anno_df['post_layer'] == '5')] = '5'

In [None]:
anno_df['post_class'].unique()

In [None]:
#Annotation combinations
anno_df = anno_df.assign(pre_post_class=(anno_df['pre_class'].map(str) + '_' + anno_df['post_class'].map(str)))
anno_df = anno_df.assign(pre_post_ex=(anno_df['pre_ex'].map(str) + '_' + anno_df['post_ex'].map(str)))

#Assign numeric id to 'combined_anno'
anno_df = anno_df.assign(num_pre_post_class=(anno_df['pre_post_class']).astype('category').cat.codes)
anno_df = anno_df.assign(num_pre_post_ex=(anno_df['pre_post_ex']).astype('category').cat.codes)

In [None]:
# make one DataFrame so it is easy to exclude data
df = anno_df
df.shape
print(embed2D['zX'].shape)
print(embed3D['zX'].shape)
print(df.shape)
df = df.join(pd.DataFrame(embed2D['zX'], columns = ['z2Da', 'z2Db']))
df = df.join((pd.DataFrame(embed3D['zX'], columns = ['z3Da', 'z3Db', 'z3Dc' ])))
df.keys()

# Look at classification of pre synaptic neuron based on excitation

In [None]:
# get rid of unknown excitation
df_ex = df[df['pre_ex'] != 'U']

#Assign numeric catatory for plotting
df_ex = df_ex.assign(num_pre_ex=(df_ex['pre_ex']).astype('category').cat.codes)

### 2D embedding

In [None]:
X2D = df_ex[['z2Da', 'z2Db']].values
plt.figure(figsize = (15,15))
plt.scatter(X2D[:,0], X2D[:,1], s=1, c=df_ex['num_pre_ex'].values, cmap='viridis')
ax = plt.gca()
ax.set_xlim(-4,4)
ax.set_ylim(-4,4)

In [None]:
y = df_ex['num_pre_ex'].values

In [None]:
# Train a Random Forrest on this to see how well it can be classified
y = df_ex['num_pre_ex'].values
X_train, X_test, y_train, y_test = train_test_split(X2D, y, stratify = y) #split up data based on equal number of y output
clf =  RandomForestClassifier(max_depth=None, min_samples_split=2).fit(X_train, y_train)
#clf = LogisticRegression().fit(X_train, y_train)
y_predict = clf.predict(X_test)
print('RF accuracy', clf.score(X_test, y_test))
print('base line accuracy', sum(y_test == 1)/len(y_test))
print('confusion', metrics.confusion_matrix(y_test, y_predict))

#### Look in 3D

In [None]:
X3D = df_ex[['z3Da', 'z3Db', 'z3Dc']].values
X3D

In [None]:
# Configure the trace.
trace = go.Scatter3d(
#    x=[1, 2, 3],  # <-- Put your data instead
#    y=[4, 5, 6],  # <-- Put your data instead
#    z=[7, 8, 9],  # <-- Put your data instead
    x=X3D[:,0], 
    y=X3D[:,1],
    z=X3D[:,2],
#    s=1,
    mode = 'markers',
    marker = dict(
        size = 5,
        color=df_ex['num_pre_ex'].values, 
        colorscale='viridis',
        colorbar = dict()),
)

# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

data = [trace]
plot_figure = go.Figure(data=data, layout=layout)

# Render the plot.
plotly.offline.iplot(plot_figure)

# Look at cre_lines

In [None]:
df.groupby(by='num_pre_post_class').describe()


In [None]:
update this to represent the small number of cre-lines below is not right

df_cre= df[df['pre_ex'] != 'U']

#Assign numeric catatory for plotting
df_ex = df_ex.assign(num_pre_ex=(df_ex['pre_ex']).astype('category').cat.codes)


In [None]:
plt.figure(figsize = (15,15))
plt.scatter(X2D['zX'][:,0], X2D['zX'][:,1], s=1, c=anno_df['num_pre_post_class'].values, cmap='viridis')
ax = plt.gca()
ax.set_xlim(-7,7)
ax.set_ylim(-7,7)
print(np.cov(np.transpose(Z2D['zX'])))

In [None]:
X = Z['zX']
y = anno_df['num_pre_post_ex'].values
clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
     random_state=0)
scores = cross_val_score(clf, X, y, cv=5)
scores.mean()                               

In [None]:
# Configure the trace.
X3D = df[['z3Da', 'z3Db', 'z3Dc']].values
trace = go.Scatter3d(
#    x=[1, 2, 3],  # <-- Put your data instead
#    y=[4, 5, 6],  # <-- Put your data instead
#    z=[7, 8, 9],  # <-- Put your data instead
    x=X3D[:,0], 
    y=X3D[:,1],
    z=X3D[:,2],
#    s=1,
    mode = 'markers',
    marker = dict(
        size = 5,
        color=df['num_pre_post_class'].values, 
        colorscale='viridis',
        colorbar = dict()),
)

# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

data = [trace]
plot_figure = go.Figure(data=data, layout=layout)

# Render the plot.
plotly.offline.iplot(plot_figure)

In [None]:
clf.fit(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y) #split up data based on equal number of y output
clf =  AdaBoostClassifier(RandomForestClassifier(max_depth=None, min_samples_split=2,
     random_state=0)).fit(X_train, y_train)
clf.predict(X_test)
clf.score(X_test, y_test)