In [1]:
# Import packages
import os
import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import datetime

# Import dabest for plotting. Set path to DABEST folder.
import sys
sys.path.insert(0, '/Users/CeceliaAndrews/DABEST-python')
import dabest

# Import Cece's migration analysis and bootstrapping modules.
# If you are working in git repository data/nSMase2/MigrationArea
# you should not need to change the path.
import mig_analysis as ma
import cece_bootstrapping as cbs

In [2]:
# Set path to data we want to analyze
path = os.path.abspath('')+'/CSVs/'

# Load data as a dataframe with the columns we want.
# get_ultimate_dataframe includes tests to confirm the csv files look how we want them.
# The function will print info about the csvs.
df_data = ma.get_ultimate_dataframe(path)
df_data.head(20)

Number of files in this folder: 50


In first file:
Number of rows: 2
Number of columns: 6
Column names: [' ' 'Label' 'Area' 'Mean' 'IntDen' 'RawIntDen']
Image name: 20180715_SMPD3MONC11m3bCat_0,8mM2ugul_Pax7GFP_Emb6_8ss_10x
Image name components:
20180715
SMPD3MONC11m3bCat
0,8mM2ugul
Pax7GFP
Emb6
8ss
10x


You're good to go! All files match expected format.


Unnamed: 0,Date,Treatment,Dose,Image,Somites,Control Area,Experiment Area,Exp/Ctl Area,Norm Control Area,Norm Experiment Area
0,20180715,SMPD3MONC11m3bCat,"0,8mM2ugul","20180715_SMPD3MONC11m3bCat_0,8mM2ugul_Pax7GFP_...",8ss,72103.705,23918.319,0.331721,1.119337,0.371308
1,20180630,SMPD3MO,"0,8mM","20180630_SMPD3MO_0,8mM_Pax7_Emb5_8ss_10x",8ss,80452.846,47922.588,0.595661,0.955197,0.568973
2,20181119,SMPD3MO,"0,6mM","20181119_SMPD3MO_0,6mM_Pax7_Emb6_7ss_10x",7ss,47346.082,27928.306,0.589876,0.562129,0.331586
3,20180630,SMPD3MO,"0,8mM","20180630_SMPD3MO_0,8mM_Pax7_Emb3_9ss_10x",9ss,146043.286,47621.453,0.326078,1.733937,0.565398
4,20180514,SMPD3MO,"0,8mM","20180514_SMPD3MO_0,8mM_Pax7Laminin_Emb6_8ss_10x",8ss,72508.322,52025.129,0.717506,0.860874,0.617682
5,20190603,SMPD3MONC11m3bCat,"0,8mM2ugul","20190603_SMPD3MONC11m3bCat_0,8mM2ugul_Cad6bRFP...",8ss,66671.106,20264.501,0.303947,1.035002,0.314586
6,20181102,SMPD3gRNA1RNP,"0,35ugul","20181102_SMPD3gRNA1RNP_0,35ugul_Pax7Laminin_Em...",7ss,56474.135,44153.345,0.781833,1.411108,1.103251
7,20180715,SMPD3MONC11m3bCat,"0,8mM2ugul","20180715_SMPD3MONC11m3bCat_0,8mM2ugul_Pax7GFP_...",7ss,50204.911,35216.774,0.701461,0.779381,0.546705
8,20181119,SMPD3MO,"1,0mM","20181119_SMPD3MO_1,0mM_Pax7_Emb5_6ss_10x",6ss,41534.435,22358.227,0.538306,0.493128,0.265454
9,20181102,SMPD3gRNA1RNP,"0,35ugul","20181102_SMPD3gRNA1RNP_0,35ugul_Pax7Laminin_Em...",7ss,34125.389,26955.85,0.789906,0.852684,0.67354


In [3]:
# Save dataframe as csv
df_data.to_csv(ma.get_analysis_date() + '_MigrationResults.csv')

In [4]:
# Print summary statistics for each experiment
ma.print_stats(df_data)

Mean
                   Control Area  Experiment Area  Exp/Ctl Area
Treatment                                                     
SMPD3MO            84226.407037     49172.997333      0.603374
SMPD3MONC11m3bCat  64416.428750     28288.802625      0.462259
SMPD3gRNA1DNA      84971.282750     63640.824000      0.753154
SMPD3gRNA1RNP      40021.131571     32142.082286      0.804300


SEM
                   Control Area  Experiment Area  Exp/Ctl Area
Treatment                                                     
SMPD3MO             6747.873789      4089.042977      0.028058
SMPD3MONC11m3bCat   4855.633971      2025.209200      0.054928
SMPD3gRNA1DNA       7102.405584      6431.628777      0.061069
SMPD3gRNA1RNP       4141.117489      3569.071976      0.034698


Correlation
                                   Control Area  Experiment Area
Treatment                                                       
SMPD3MO           Control Area         1.000000         0.804412
                  Experi

In [5]:
# Create dataframe of summary statistics for each experiment
df_stats = ma.get_stats_df(df_data)
df_stats

Unnamed: 0,Treatment,Control Area Mean,Control Area SEM,Experiment Area Mean,Experiment Area SEM,Exp/Ctl Mean,Exp/Ctl SEM,Correlation
0,SMPD3MONC11m3bCat,64416.42875,4855.633971,28288.802625,2025.2092,0.462259,0.054928,-0.209264
1,SMPD3MO,84226.407037,6747.873789,49172.997333,4089.042977,0.603374,0.028058,0.804412
2,SMPD3gRNA1RNP,40021.131571,4141.117489,32142.082286,3569.071976,0.8043,0.034698,0.929832
3,SMPD3gRNA1DNA,84971.28275,7102.405584,63640.824,6431.628777,0.753154,0.061069,0.638671


In [6]:
cbs.add_more_stats(df_stats, df_data)

Here are the possible statistics to add:
ci
pct_ci_low
pct_ci_high
pct_low_high_indices
bca_ci_low
bca_ci_high
bca_low_high_indices
pvalue_1samp_ttest
pvalue_2samp_ind_ttest
pvalue_2samp_paired_ttest
pvalue_wilcoxon
pvalue_mann_whitney
effect_size
Check docstring for stat meanings.


What stats do you want? Separate with commas: ci, effect_size
ci
effect_size
Are these the correct stats?
Type 'y' if correct or 'n' if incorrect: y
Adding stats to statistics dataframe




Unnamed: 0,Treatment,Control Area Mean,Control Area SEM,Experiment Area Mean,Experiment Area SEM,Exp/Ctl Mean,Exp/Ctl SEM,Correlation,ci,effect_size
0,SMPD3MONC11m3bCat,64416.42875,4855.633971,28288.802625,2025.2092,0.462259,0.054928,-0.209264,95.0,-0.560845
1,SMPD3MO,84226.407037,6747.873789,49172.997333,4089.042977,0.603374,0.028058,0.804412,95.0,-0.416181
2,SMPD3gRNA1RNP,40021.131571,4141.117489,32142.082286,3569.071976,0.8043,0.034698,0.929832,95.0,-0.196872
3,SMPD3gRNA1DNA,84971.28275,7102.405584,63640.824,6431.628777,0.753154,0.061069,0.638671,95.0,-0.251031
