In [None]:
import pandas as pd
import plotly.graph_objects as go

In [None]:
data = pd.read_csv("/Volumes/Main/Working/mriqception_test/mriqception/test_data/group2_bold.tsv", sep="\t")
variables=data.columns

In [None]:
#variables = ["var1", "var2", "var3"]

#import dataframe
data = pd.read_csv("/Volumes/Main/Working/mriqception_test/mriqception/test_data/group2_bold.tsv", sep="\t")
#create an index/array of column names
variables=data.columns

#set and remove the names of variables we are not going to graph
#vars_nograph=["bids_name"]
#vars_graph=variables.drop(vars_nograph)

#create dictionary of descriptions of variables
descriptions = {
  "cjv":"Coefficient of joint variation between white matter and gray matter.\nHigher values indicate more head motion and/or intensity non-uniformity artifacts.",
  "cnr": "Contrast-to-noise ratio, reflecting separation between GM & WM.\nHigher values indicate higher quality.",
  "snr":"Signal-to-noise ratio within the tissue mask.\nHigher values indicate higher quality.",
  "snr_dietrich": "Dietrich et al. (2007)'s signal-to-noise ratio.\nHigher values indicate higher quality.",
  "art_qi2": "A quality index accounting for effects of both clustered and subtle artifacts in the air background.\nHigher values indicate lower quality.",
  "art_qi1":"The proportion of voxels outside the brain with artifacts to the total number of voxels outside the brain.\nHigher values indicate lower quality.",
  "wm2max":"Captures skewed distributions within the WM mask, caused by fat and vascular-related hyperintensities.\nIdeal values fall within the interval [0.6, 0.8].",
  "fwhm_":"Image blurriness (full-width half-maximum).\nHigher values indicate a blurrier image.",
  "volume_fraction":"Summary statistics for the intra-cranial volume fractions of CSF, GM, and WM.\nBe aware of potential outliers.",
  "rpve":"Residual partial volume error.\nHigher values indicate lower quality.",
  "overlap_":"How well the image tissue probability maps overlap with those from the MNI ICBM 2009 template.\nHigher values indicate better spatial normalization.",
  "efc":"Shannon entropy criterion.\nHigher values indicate more ghosting and/or head motion blurring. ",
  "fber":"The variance of voxels inside the brain divided by the variance of voxels outside the brain.\nHigher values indicate higher quality.",
  "inu_":"Intensity non-uniformity (bias field) summary statistics.\nValues closer to 1 indicate higher quality; further from zero indicate greater RF field inhomogeneity.",
  "summary_stats":"Summary statistics for average intensities in CSF, GM, and WM.",
  "dvars":"The average change in mean intensity between each pair of fMRI volumes in a series.\nHigher values indicate more dramatic changes (e.g., due to motion or spiking).",
  "gcor":"Average correlation of all pairs of voxel time series inside of the brain. Illustrates differences between data due to motion/physiological noise/imaging artifacts.\nValues closer to zero are better.",
  "tsnr":"Temporal signal-to-noise ratio taking into account mean signal over time.\nHigher values indicate higher quality.",
  "fd_mean":"A measure of subject head motion, which compares the motion between the current and previous volumes.\nHigher values indicate lower quality.",
  "fd_num":"Number of timepoints with framewise displacement >0.2mm.\nHigher values indicate lower quality.",
  "fd_perc":"Percent of timepoints with framewise displacement >0.2mm.\nHigher values indicate lower quality.",
  "gsr":"Ghost-to-signal ratio along the x or y encoding axes.\nHigher values indicate lower quality.",
  "aor":"Mean fraction of outliers per fMRI volume, from AFNI's 3dToutcount.\nHigher values indicate lower quality.",
  "aqi":"Mean quality index, from AFNI's 3dTqual.\nValues close to 0 indicate higher quality.",
  "dummy":"Number of volumes in the beginning of the fMRI timeseries identified as non-steady state."
}
#print(descriptions)

#vars_graph

In [None]:
def my_function(df, variables, dictionary):
    
    # change the file to long format
    df_long = pd.melt(df,id_vars='bids_name',var_name='var',value_name='values')
    
    for var_name in variables:
        
        # create a split violin plot for a single variable
        fig = go.Figure()
        
        # the 'my data' variable is a subset of the original df for plotting reasons
        # replace it with the actual user data
        user_data = df_long[df_long['var'] == var_name][20:40]
        
        fig.add_trace(go.Violin(x=user_data[['var']][user_data['var']==var_name]['var'],
                        y=user_data[['values']][user_data['var']==var_name]['values'],
                        legendgroup='user data', scalegroup='user data', name='user data',
                        side='negative',
                        points='all',
                        pointpos=-0.5, # where to position points
                        jitter=0.1,
                        line_color='lightseagreen',
                        text=user_data['bids_name'])
             )
        fig.add_trace(go.Violin(x=df_long[['var']][df_long['var']==var_name]['var'],
                        y=df_long[['values']][df_long['var']==var_name]['values'],
                        legendgroup='api', scalegroup='api', name='api',
                        side='positive',
                        line_color='mediumpurple',
                        text=user_data['bids_name'])
             )
        # update characteristics shared by all traces
        fig.update_traces(meanline_visible=True,
                  box_visible=True) #scale violin plot area with total count
        fig.show()

        #print description of figure
        print(dictionary.get(var_name))

In [None]:
my_function(data,variables[1:5], descriptions)