In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# !pip install umap-learn
import umap

### responses

In [None]:
res = pd.read_csv('https://raw.githubusercontent.com/samuellab/Larval-ORN/master/Figure2/data/Data%20S1.csv')
res

In [None]:
# take mean for each Odour and Concentration
res_ave = res.groupby(['Odor','Concentration']).mean().reset_index()

In [None]:
res_ave.Concentration.value_counts()

In [None]:
res_ave.Odor.value_counts()

### odour origin info 

In [None]:
# this is in Supplement 3 of the paper 
source = pd.read_csv('/Users/yijieyin/Downloads/Si2019_odour_sources.csv')
source.head()

In [None]:
res.Odor.isin(source.odorant).value_counts()

In [None]:
# if I recall correctly, this one didn't elicit any response 
source.odorant[~source.odorant.isin(res.Odor)]

### odour meta info

In [None]:
odour_des = pd.read_csv('/Users/yijieyin/Downloads/Odor_Structure_Descriptors_EDragon.csv')
odour_des.MOL_NAME = odour_des.MOL_NAME.str.lower()
odour_des.head()

In [None]:
odour_des[~odour_des.MOL_NAME.isin(res.Odor)]

In [None]:
odour_des.loc[odour_des.MOL_NAME == '2-phenylethanol','MOL_NAME'] = '2-phenyl ethanol'
odour_des.loc[odour_des.MOL_NAME == '(1r)-(-)-myrtenal', 'MOL_NAME'] = 'myrtenal'

In [None]:
# there are ORNs not recorded for some conditions 
nas = res.groupby(['Odor','Concentration']).apply(lambda group: group.isna().sum())
nas.sum(axis=1)[nas.sum(axis=1)>0]

In [None]:
# remove rows with any NaN in them 
res_nona = res.dropna()
res_data = res_nona.drop(['Odor','Exp_ID','Concentration'], axis = 1)

In [None]:
res_data.shape

In [None]:
reducer = umap.UMAP()
embedding = reducer.fit_transform(res_data.values)
embedding.shape

In [None]:
meta = pd.DataFrame(embedding, columns = ['x','y'])
meta['odour'] = res_nona.Odor.values
meta['concentration'] = res_nona.Concentration.values
meta = meta.merge(source, left_on = 'odour', right_on='odorant')\
.merge(odour_des, left_on = 'odour', right_on = 'MOL_NAME')
meta.head()

In [None]:
plt.figure(figsize=(15,10))
sns.scatterplot(data = meta, x = 'x', y = 'y', 
                hue = 'odour', size = 'concentration')

In [None]:
plt.figure(figsize=(15,10))
sns.scatterplot(data = meta, x = 'x', y = 'y', 
                hue = 'valence', size = 'concentration')

In [None]:
plt.figure(figsize=(15,10))
sns.scatterplot(data = meta, x = 'x', y = 'y', 
                hue = 'origin', size = 'concentration')

In [None]:
plt.figure(figsize=(15,10))
sns.scatterplot(data = meta, x = 'x', y = 'y', 
                hue = 'valence', size = 'P1s')

## chemical feature exploration

In [None]:
corres = meta.drop(list(source.columns) + ['odour','concentration','No.', 'MOL_NAME', "PUBCHEM_ID",'MOL_ID'], axis = 1).corr()

In [None]:
corres.shape

In [None]:
# top correlations with the x axis 
corres[['x','y']].sort_values('x', ascending=False).iloc[1:10,:]

In [None]:
# top reverse correlations with the x axis 
corres[['x','y']].sort_values('x').iloc[1:10,:]

In [None]:
corres.x.plot.hist()

In [None]:
# top correlations with the y axis 
corres[['x','y']].sort_values('y', ascending=False).iloc[1:10,:]

In [None]:
plt.figure(figsize=(15,10))
sns.scatterplot(data = meta, x = 'x', y = 'y', 
                hue = 'valence', size = 'BEHm1')