Skip to content

Commit

Permalink
improved code to remove extra lines
Browse files Browse the repository at this point in the history
  • Loading branch information
jenna-tomkinson committed Jan 11, 2023
1 parent 08bbb0f commit ec78e8c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 46 deletions.
Binary file modified 5_analyze_data/data/norm_fs_embeddings.csv.gz
Binary file not shown.
27 changes: 8 additions & 19 deletions 5_analyze_data/notebooks/UMAP_analysis/UMAPutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import pathlib
import pandas as pd


def split_data(pycytominer_output: pd.DataFrame):
"""
split pycytominer output to metadata dataframe and np array of feature values
split pycytominer output to return metadata dataframe
Parameters
----------
Expand All @@ -17,8 +18,8 @@ def split_data(pycytominer_output: pd.DataFrame):
Returns
-------
pd.Dataframe, np.ndarray
metadata dataframe, feature values
pd.Dataframe
metadata dataframe
"""
# split metadata from features
metadata_cols = [
Expand All @@ -28,18 +29,13 @@ def split_data(pycytominer_output: pd.DataFrame):
]
metadata_dataframe = pycytominer_output[metadata_cols]

feature_cols = [
col_name
for col_name in pycytominer_output.columns.tolist()
if "Metadata" not in col_name
]
feature_data = pycytominer_output[feature_cols].values

return metadata_dataframe, feature_data
return metadata_dataframe


def merge_metadata_embeddings(
metadata_dataframe: pd.DataFrame, embeddings: pd.DataFrame, save_path: pathlib.Path = None
metadata_dataframe: pd.DataFrame,
embeddings: pd.DataFrame,
save_path: pathlib.Path = None,
):
"""
merge metadata with UMAP embeddings into one dataframe
Expand All @@ -56,13 +52,6 @@ def merge_metadata_embeddings(
pd.Dataframe
merged dataframe with metadata and embeddings
"""
# reset index to remove the 'Metadata_WellRow' as the index then drop the index
metadata_dataframe = metadata_dataframe.reset_index()
metadata_dataframe = metadata_dataframe.reset_index(drop=True)

# remove index from embeddings dataframe as well to prevent IndexError
embeddings = embeddings.reset_index(drop=True)

# put dataframes into list of where the columns should go
dataframes = [metadata_dataframe, embeddings]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>Metadata_WellRow</th>\n",
" <th>Metadata_WellCol</th>\n",
" <th>Metadata_number_of_singlecells</th>\n",
Expand All @@ -233,7 +232,6 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
Expand All @@ -251,7 +249,6 @@
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
Expand All @@ -269,7 +266,6 @@
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
Expand All @@ -287,7 +283,6 @@
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
Expand All @@ -305,7 +300,6 @@
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
Expand Down Expand Up @@ -337,11 +331,9 @@
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>144</td>\n",
" <td>F</td>\n",
" <td>7</td>\n",
" <td>46</td>\n",
Expand All @@ -359,7 +351,6 @@
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>145</td>\n",
" <td>F</td>\n",
" <td>7</td>\n",
" <td>46</td>\n",
Expand All @@ -377,7 +368,6 @@
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>146</td>\n",
" <td>F</td>\n",
" <td>7</td>\n",
" <td>46</td>\n",
Expand All @@ -395,7 +385,6 @@
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>147</td>\n",
" <td>F</td>\n",
" <td>7</td>\n",
" <td>46</td>\n",
Expand All @@ -413,7 +402,6 @@
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>148</td>\n",
" <td>F</td>\n",
" <td>7</td>\n",
" <td>46</td>\n",
Expand All @@ -431,22 +419,22 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>149 rows × 15 columns</p>\n",
"<p>149 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" index Metadata_WellRow Metadata_WellCol Metadata_number_of_singlecells \\\n",
"0 0 C 6 12 \n",
"1 1 C 6 12 \n",
"2 2 C 6 12 \n",
"3 3 C 6 12 \n",
"4 4 C 6 12 \n",
".. ... ... ... ... \n",
"144 144 F 7 46 \n",
"145 145 F 7 46 \n",
"146 146 F 7 46 \n",
"147 147 F 7 46 \n",
"148 148 F 7 46 \n",
" Metadata_WellRow Metadata_WellCol Metadata_number_of_singlecells \\\n",
"0 C 6 12 \n",
"1 C 6 12 \n",
"2 C 6 12 \n",
"3 C 6 12 \n",
"4 C 6 12 \n",
".. ... ... ... \n",
"144 F 7 46 \n",
"145 F 7 46 \n",
"146 F 7 46 \n",
"147 F 7 46 \n",
"148 F 7 46 \n",
"\n",
" Metadata_gene_name Metadata_genotype Metadata_ImageNumber \\\n",
"0 NF1 WT 1 \n",
Expand Down Expand Up @@ -513,7 +501,7 @@
"147 16 14.885182 -1.832831 \n",
"148 18 15.622439 -0.179640 \n",
"\n",
"[149 rows x 15 columns]"
"[149 rows x 14 columns]"
]
},
"execution_count": 4,
Expand Down
2 changes: 1 addition & 1 deletion 5_analyze_data/notebooks/UMAP_analysis/umap_genotype.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) \n[GCC 10.3.0]"
},
"orig_nbformat": 4,
"vscode": {
Expand Down

0 comments on commit ec78e8c

Please sign in to comment.