Skip to content

Commit

Permalink
edits and add figures
Browse files Browse the repository at this point in the history
  • Loading branch information
jenna-tomkinson committed Jan 17, 2023
1 parent f758340 commit a52b9c7
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 68 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 28 additions & 23 deletions 5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.ipynb

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions 5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# ## Import libraries

# In[1]:
# In[8]:


import matplotlib.pyplot as plt
Expand All @@ -16,7 +16,7 @@

# ## Read in NF1 data `csv`

# In[2]:
# In[9]:


norm_fs_data = pathlib.Path("../../../4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz")
Expand All @@ -29,7 +29,7 @@

# ## Helper function to split `csv` into metadata and features

# In[3]:
# In[10]:


def split_data(pycytominer_output: pd.DataFrame):
Expand Down Expand Up @@ -60,7 +60,7 @@ def split_data(pycytominer_output: pd.DataFrame):

# ## Split NF1 data `csv`

# In[4]:
# In[11]:


metadata_dataframe, feature_data = split_data(data)
Expand All @@ -69,7 +69,7 @@ def split_data(pycytominer_output: pd.DataFrame):

# ## Transpose the NF1 dataframe

# In[5]:
# In[12]:


data_trans = feature_data.transpose()
Expand All @@ -78,20 +78,26 @@ def split_data(pycytominer_output: pd.DataFrame):

# ## Create correlation heatmap

# In[6]:
# In[13]:


data_trans_heatmap = sb.heatmap(data_trans.corr())

plt.show()

save_path = pathlib.Path("figures/correlation_heatmap_sc.png")
plt.savefig(save_path, bbox_inches="tight")


# ## Create clustermap with correlation heatmap

# In[7]:
# In[14]:


sb.clustermap(data_trans.corr(),
cmap='RdBu_r',
)

save_path = pathlib.Path("figures/correlation_clustermap_sc.png")
plt.savefig(save_path, bbox_inches="tight")

43 changes: 22 additions & 21 deletions 5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -309,7 +309,7 @@
"[5 rows x 1055 columns]"
]
},
"execution_count": 10,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -333,17 +333,17 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def nf1_ks_test_two_sample(data: pd.DataFrame):\n",
" \"\"\"seperate features by genotype and perform two sample ks-test on each feature\n",
"def nf1_ks_test_two_sample(normalized_data: pd.DataFrame):\n",
" \"\"\"separate features by genotype and perform two sample ks-test on each feature\n",
"\n",
" Parameters\n",
" ----------\n",
" data : pd.Dataframe\n",
" pycytominer output after normalization and feature selection\n",
" normalized_data : pd.Dataframe\n",
" pycytominer output after normalization\n",
"\n",
" Returns\n",
" -------\n",
Expand All @@ -353,11 +353,11 @@
" feature_results = []\n",
"\n",
" # divide the NF1 data based on genotype\n",
" null_features = data[(data[\"Metadata_genotype\"] == \"Null\")]\n",
" wt_features = data[(data[\"Metadata_genotype\"] == \"WT\")]\n",
" null_features = normalized_data[(normalized_data[\"Metadata_genotype\"] == \"Null\")]\n",
" wt_features = normalized_data[(normalized_data[\"Metadata_genotype\"] == \"WT\")]\n",
"\n",
" # iterate through the columns in the data (both of the genotype dataframes will have the same columns)\n",
" for column in data:\n",
" for column in normalized_data:\n",
" # do not include metadata columns\n",
" if \"Metadata\" not in column:\n",
" # convert each individual column (feature) into numpy array\n",
Expand All @@ -366,8 +366,9 @@
" \n",
" # run two-sample ks-test for each feature \n",
" results = ks_2samp(wt_feature, null_feature)\n",
" # have to seperate out namedtuple due to scipy hiding the last two results \n",
" results = tuple([results.statistic, results.pvalue, results.statistic_location, results.statistic_sign])\n",
" # convert all keys/ks-test results (even the hidden ones due to scipy) into a dictionary \n",
" # and put them as a list\n",
" results = tuple(list(results._asdict().values()))\n",
" feature_results.append(results)\n",
"\n",
" feature_results = pd.DataFrame(feature_results, columns=[\"statistic\", \"pvalue\", \"statistic_location\", \"statistic_sign\"])\n",
Expand Down Expand Up @@ -414,12 +415,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Peform two sample KS-test"
"## Perform two sample KS-test"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 19,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -549,7 +550,7 @@
"[1043 rows x 4 columns]"
]
},
"execution_count": 12,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -569,7 +570,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 20,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -663,7 +664,7 @@
"[1043 rows x 1 columns]"
]
},
"execution_count": 13,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -692,7 +693,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -847,7 +848,7 @@
"[1043 rows x 5 columns]"
]
},
"execution_count": 14,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
35 changes: 18 additions & 17 deletions 5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# ## Import libraries

# In[8]:
# In[15]:


import numpy as np
Expand All @@ -16,15 +16,15 @@

# ## Set seed

# In[9]:
# In[16]:


np.random.seed(0)


# ## Load in NF1 data

# In[10]:
# In[17]:


norm_fs_data = pathlib.Path("../../../4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz")
Expand All @@ -37,16 +37,16 @@

# ## Helper functions to perform KS-test and create final `csv` file with results

# In[11]:
# In[18]:


def nf1_ks_test_two_sample(data: pd.DataFrame):
"""seperate features by genotype and perform two sample ks-test on each feature
def nf1_ks_test_two_sample(normalized_data: pd.DataFrame):
"""separate features by genotype and perform two sample ks-test on each feature
Parameters
----------
data : pd.Dataframe
pycytominer output after normalization and feature selection
normalized_data : pd.Dataframe
pycytominer output after normalization
Returns
-------
Expand All @@ -56,11 +56,11 @@ def nf1_ks_test_two_sample(data: pd.DataFrame):
feature_results = []

# divide the NF1 data based on genotype
null_features = data[(data["Metadata_genotype"] == "Null")]
wt_features = data[(data["Metadata_genotype"] == "WT")]
null_features = normalized_data[(normalized_data["Metadata_genotype"] == "Null")]
wt_features = normalized_data[(normalized_data["Metadata_genotype"] == "WT")]

# iterate through the columns in the data (both of the genotype dataframes will have the same columns)
for column in data:
for column in normalized_data:
# do not include metadata columns
if "Metadata" not in column:
# convert each individual column (feature) into numpy array
Expand All @@ -69,8 +69,9 @@ def nf1_ks_test_two_sample(data: pd.DataFrame):

# run two-sample ks-test for each feature
results = ks_2samp(wt_feature, null_feature)
# have to seperate out namedtuple due to scipy hiding the last two results
results = tuple([results.statistic, results.pvalue, results.statistic_location, results.statistic_sign])
# convert all keys/ks-test results (even the hidden ones due to scipy) into a dictionary
# and put them as a list
results = tuple(list(results._asdict().values()))
feature_results.append(results)

feature_results = pd.DataFrame(feature_results, columns=["statistic", "pvalue", "statistic_location", "statistic_sign"])
Expand Down Expand Up @@ -112,9 +113,9 @@ def merge_features_kstest(
return merged_dataframe


# ## Peform two sample KS-test
# ## Perform two sample KS-test

# In[12]:
# In[19]:


feature_results = nf1_ks_test_two_sample(data)
Expand All @@ -123,7 +124,7 @@ def merge_features_kstest(

# ## Take feature columns from data and create a list

# In[13]:
# In[20]:


# find feature names in the columns from the data
Expand All @@ -141,7 +142,7 @@ def merge_features_kstest(

# ## Save the final `csv` file with merged features and results

# In[14]:
# In[21]:


save_path = pathlib.Path("data/nf1_kstest_two_sample_results.csv")
Expand Down

0 comments on commit a52b9c7

Please sign in to comment.