**Find and mark default entires**


Reads data csv file and finds all cells that have "Default" entries. Appends new columns and saves these entries to enable recovery after replacing the defaults with known values. 

In [30]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot, plot

import seaborn as sns
import matplotlib.colors as mc
import matplotlib.pyplot as plt

# Run this line if you are in a Jupyter Notebook environment
init_notebook_mode(connected=True)

# define paths for data and output
paths = {"base": "C://Users//avonl//OneDrive//Work//Research//projects//2023 - FRESH//code//FRESH//",  
         "data": "data//", 
        }

# loads pandas dataframe
df = pd.read_csv(paths["base"]+paths["data"]+"FreshData.csv", encoding='utf-16', delimiter='\t')

# create a list of Names that will be the column headers of a new dataframe
df_keep_columnnames = ['Study', 'ID', 'Toolbox used', 'Data quality/pruning: Default', 
                          'Motion Artifact Correction: Default Parameters?',
                          'Did you use Default Parameters for your Filters?', 
                          'GLM: Method', 'GLM: HRF Regressor',
                          'Did you use Default Parameters for AR model order?']

# create a new dataframe which is a copy of df
# but only contains the columns that are indicated by df_keep_columnnames
df_default = df[df_keep_columnnames].copy()

# rename 'Toolbox used' to 'Investigated'
df_default = df_default.rename(columns={'Toolbox used': 'Investigated'})
# set all rows in column 'Investigated' to 'FALSE' if they state 'Not investigated', otherwise to 'TRUE'
df_default['Investigated'] = np.where(df_default['Investigated'] == 'Not investigated', 'FALSE', 'TRUE')

# for the columns 'GLM: Method' and 'GLM: HRF Regressor' rename all cells that contain 'Default' to 'TRUE' and all others to 'FALSE'
df_default['GLM: Method'] = np.where(df_default['GLM: Method'] == 'Default', 'TRUE', 'FALSE')
df_default['GLM: HRF Regressor'] = np.where(df_default['GLM: HRF Regressor'] == 'Default', 'TRUE', 'FALSE')

# in all columns replace all cells that contain 'checked' to 'TRUE'
df_default = df_default.replace('checked', 'TRUE', regex=True)
# in all columns replace all cells that contain a nan to 'FALSE'
df_default = df_default.replace(np.nan, 'FALSE', regex=True)

# all rows that contain 'FALSE' in column 'Investigated' are set to nan in all other columns except 'Study', 'ID' and 'Investigated'
df_default.loc[df_default['Investigated'] == 'FALSE', df_default.columns.difference(['Study', 'ID', 'Investigated'])] = np.nan

column_names = ['Study', 'ID', 'Investigated', 'Quality/Pruning Parameters', 
                'Motion Artifact Correction Parameters', 'Filter Parameters', 
                'GLM Method', 'GLM HRF Regressor', 'AR Model Order']
# rename the column names in df_default to the names given in column_names
df_default.columns = column_names

# save dataframe as .csv with encoding='utf-16', delimiter='\t'
df_default.to_csv(paths["base"]+paths["data"]+"FreshData_defaults.csv", encoding='utf-16', sep='\t')


