In [1]:
import pandas as pd
import numpy as np
import glob as glob

In [8]:
"""### Import Files

Import all files from specified folder and combine into one dataframe called 'df'
"""

path = r'/Users/irishardege/Library/CloudStorage/OneDrive-UniversityofCambridge/Zoology/DATA/TEVC/Iris_52_54_GFP_tags/DRs' # use your path
all_files = glob.glob(path + "/*_Datatable.dat") # use your extension

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0, sep='\t') # for .dat files you must specify separation for example sep='\t'
    li.append(df)

df = pd.concat(li, axis=0, ignore_index=True)

df.shape
df.head(5)

Unnamed: 0,IV Prot.,IVCurveID,Well,ID,Series,ROI,Minimum,Pos Min,Maximum,Pos Max,...,Comp. 1,conc. 1,unit 1,Comp. 2,conc. 2,unit 2,Comp. 3,conc. 3,unit 3,Extremum
0,,,B1,14,0,0,-90,28668,-47,30102,...,Dopamine,1,nM,empty,0,mM,empty,0,mM,-47
1,,,B1,15,0,0,-36,28292,1,29698,...,Dopamine,10,nM,empty,0,mM,empty,0,mM,1
2,,,B1,16,0,0,-29,29882,-1,30318,...,Dopamine,100,nM,empty,0,mM,empty,0,mM,-1
3,,,B1,17,0,0,-249,20406,12,13220,...,Dopamine,1,µM,empty,0,mM,empty,0,mM,12
4,,,B1,18,0,0,-2374,20090,16,13536,...,Dopamine,10,µM,empty,0,mM,empty,0,mM,-2374


In [11]:
"""### Remove excess coloumns and clean data
Here we are keeping only the relevant information, you can add or remove coloumns as you like.
We will also be standardising the units and generally cleaning up the data
"""

#coloumn names must be updated to those in your files, these are based on the standard coloumn titles from Robocyte2+ software export
data =  df[['Well'] + ['Buffer'] + ['Comp. 1'] + ['conc. 1'] + ['unit 1'] + ['Minimum'] + ['Baseline Average'] + ['Start Date']]

data['date'] = data['Start Date'].str.extract('(../../....)', expand=True)

"""Transform units all to uM scale"""

#Transform units all to µM scale
conditions = [
    (data['unit 1'] == 'mM'),
    (data['unit 1'] == 'µM'),
    (data['unit 1'] == 'nM')]

choices = ['1000', '1', '0.001']

data['factor'] = np.select(conditions, choices, default='0')
data['factor']=data.factor.astype(float)
data['Agonist_Concentration_µM'] = data['conc. 1'] * data['factor']

data =  data[['Well'] + ['Buffer'] + ['Comp. 1'] + ['Minimum'] + ['Agonist_Concentration_µM'] + ['date']]
data.reset_index(drop=True)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['date'] = data['Start Date'].str.extract('(../../....)', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['factor'] = np.select(conditions, choices, default='0')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['factor']=data.factor.astype(float)
A value is trying to be set 

Unnamed: 0,Well,Buffer,Comp. 1,Minimum,Agonist_Concentration_µM,date
0,B1,ND96,Dopamine,-90,0.001,
1,B1,ND96,Dopamine,-36,0.01,
2,B1,ND96,Dopamine,-29,0.1,
3,B1,ND96,Dopamine,-249,1.0,
4,B1,ND96,Dopamine,-2374,10.0,


In [12]:
#data['log_Agonist_µM'] = np.log(data['Agonist_Concentration_µM']) #if you wish to calculate log concentration
data['Agonist'] = data['Comp. 1']
data['Response'] = data['Minimum']
data = data.drop(columns=['Comp. 1'])
data.head()

"""## Transform data to I/Imax for each oocyte"""

norm = data.Minimum / data.groupby(['Well']).Minimum.transform(np.min)
data['Response'] = norm
normdata = data.drop(columns=['Minimum', 'Buffer']) #drop unwanted coloumns
normgroup = normdata.groupby(['Agonist_Concentration_µM']).describe()

grouped_data = normdata.groupby(['Agonist', 'Agonist_Concentration_µM']) #view grouped data
grouped_data['Response'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Agonist,Agonist_Concentration_µM,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Dopamine,0.001,11.0,0.027489,0.018129,0.009183,0.01422,0.022716,0.034677,0.061823
Dopamine,0.01,11.0,0.024111,0.027948,0.008531,0.010089,0.011322,0.023574,0.102891
Dopamine,0.1,11.0,0.02106,0.016037,0.006003,0.012623,0.016943,0.020884,0.059487
Dopamine,1.0,11.0,0.117884,0.054176,0.046761,0.075144,0.111722,0.160624,0.211515
Dopamine,10.0,11.0,0.529521,0.085146,0.355204,0.508504,0.54662,0.594989,0.624026
Dopamine,100.0,11.0,0.93567,0.040517,0.872677,0.904698,0.942263,0.964281,0.997788
Dopamine,1000.0,11.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [14]:
#none groupedd data
normdata.head()

Unnamed: 0,Well,Agonist_Concentration_µM,date,Agonist,Response
0,B1,0.001,,Dopamine,0.022716
1,B1,0.01,,Dopamine,0.009086
2,B1,0.1,,Dopamine,0.00732
3,B1,1.0,,Dopamine,0.062847
4,B1,10.0,,Dopamine,0.599192


In [16]:
"""## Export as .csv
comment out if you do not want to export .csv
change path and file name before running

exports grouped mean, std, N into a new .csv file

Includes *injection ID*, Agonist and log agonist concentrations in uM
"""

export = normdata #select data to export
export = normdata.groupby(['Agonist', 'Agonist_Concentration_µM']) #select groups to average
export = export['Response'].describe()
#export = export.loc[export['Agonist_Concentration_µM'] == 5.000] #if you wish to export a single dose
export.head(15) #if you wish to view the exported data
export.to_csv(r'/Users/irishardege/Library/CloudStorage/OneDrive-UniversityofCambridge/Zoology/DATA/TEVC/Iris_52_54_GFP_tags/DRs/export.csv')