In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))  # Add the src directory to path 
from src.utils import tight_bbox, parse_lipid
from src.plots import plot_quant_vs_ogtt, set_square_ratio

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['axes.unicode_minus'] = False  # https://stackoverflow.com/questions/43102564/matplotlib-negative-numbers-on-tick-labels-displayed-as-boxes
plt.style.use('seaborn-ticks')  # 'seaborn-ticks'

import importlib
from pathlib import Path

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_squared_log_error

from statsmodels.api import formula as smf

In [2]:
colors = json.load(open(r'..\data\metadata\color_schemes.json'))
compound_superclasses = json.load(open('../data/metadata/compound_superclasses.json', 'r'))
    
data = pd.read_csv(r'../data/processed/combined_metabolites_data_with_model_params.csv').set_index('i')

fg = pd.read_csv(r'..\data\metadata\combined_metab_lipid_file_grouping.csv', index_col=0)
ap = pd.read_excel(r'..\data\metadata\animal_phenotypes.xlsx', index_col=0)

vldata = pd.read_csv('../data/processed/1700s_validation_lipids.csv', index_col=0)
vldata_cols = fg.loc[(fg['cohort'] == '30wk') & fg['has_ogtt']].index
qc_rep_1700s = fg.loc[fg['qc_rep']].index

In [15]:
[x for x in data.columns]

['unique_id',
 'ID',
 'm/z',
 'RT',
 'molec_class',
 'Type',
 'polarity',
 'lc_type',
 'superclass',
 '1091_8_FBG',
 '1091_10_FBG',
 '1093_8_FBG',
 '1093_9_FBG',
 '1093_10_FBG',
 '1060_8_FBG',
 '1060_9_FBG',
 '1060_10_FBG',
 '1062_8_FBG',
 '1062_9_FBG',
 '1062_10_FBG',
 '1074_8_FBG',
 '1074_9_FBG',
 '1074_10_FBG',
 '1092_8_FBG',
 '1092_9_FBG',
 '1092_10_FBG',
 '1102_8_FBG',
 '1102_9_FBG',
 '1102_10_FBG',
 '1076_8_FBG',
 '1076_9_FBG',
 '1076_10_FBG',
 '1082_8_FBG',
 '1082_9_FBG',
 '1082_10_FBG',
 '1101_8_FBG',
 '1101_9_FBG',
 '1101_10_FBG',
 '1091_8_RBG',
 '1091_9_RBG',
 '1091_10_RBG',
 '1093_8_RBG',
 '1093_9_RBG',
 '1093_10_RBG',
 '1060_8_RBG',
 '1060_9_RBG',
 '1060_10_RBG',
 '1062_8_RBG',
 '1062_9_RBG',
 '1062_10_RBG',
 '1074_8_RBG',
 '1074_9_RBG',
 '1074_10_RBG',
 '1092_8_RBG',
 '1092_9_RBG',
 '1092_10_RBG',
 '1102_8_RBG',
 '1102_9_RBG',
 '1102_10_RBG',
 '1076_8_RBG',
 '1076_9_RBG',
 '1076_10_RBG',
 '1082_8_RBG',
 '1082_9_RBG',
 '1082_10_RBG',
 '1101_8_RBG',
 '1101_9_RBG',
 '1101_10_

In [11]:
data['coef_fed'], data['coef_fasted']

(i
 m_0      0.000093
 m_1      0.000037
 m_2     -0.000010
 m_3     -0.000004
 m_4     -0.000032
            ...   
 l_912    0.000051
 l_913    0.000054
 l_914    0.000038
 l_915    0.000045
 l_916    0.000048
 Name: coef_fed, Length: 1409, dtype: float64,
 i
 m_0      0.000078
 m_1      0.000001
 m_2     -0.000002
 m_3     -0.000009
 m_4     -0.000121
            ...   
 l_912    0.000003
 l_913   -0.000025
 l_914   -0.000018
 l_915   -0.000019
 l_916   -0.000016
 Name: coef_fasted, Length: 1409, dtype: float64)

In [20]:
data.loc[(data['coef_fed'] > 0) & (data['coef_fasted'] < 0), 
         ['ID', 'molec_class', 'qval_fed', 'qval_fasted', 'coef_fed', 'coef_fasted']].to_clipboard()