# simple t-test

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pylab as plt
import matplotlib
from IPython.display import display, HTML, Image

import numpy as np
import pandas as pd
from sklearn import preprocessing
from scipy import stats
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [3]:
from linker.models import Analysis
from linker.views.functions import get_last_analysis_data, get_groups, get_dataframes, get_standardized_df, \
    get_group_members
from linker.views.pipelines import WebOmicsInference
from linker.constants import *


        MATCH (n:Species) RETURN n.displayName AS name order by name        
        

            MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.displayName = 'Metabolism' AND
                tp.speciesName IN {species_list} AND
                (p)-[:hasEvent]->(rle)
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
            ORDER BY species_name, pathway_name
        


### Load test data

In [4]:
analysis_id = 25

In [5]:
data_type = METABOLOMICS
analysis = Analysis.objects.get(pk=analysis_id)
analysis_data = get_last_analysis_data(analysis, data_type)
groups = get_groups(analysis_data)

In [6]:
data_df, design_df = get_dataframes(analysis_data, PKS)

In [7]:
data_df

Unnamed: 0_level_0,obs,compound_id,UN_1,UN4,INFEC_1,INFEC_2,INFEC_3,INFEC_4,HK1,HK2,HK3,HK4,padj_INFEC_vs_UN,FC_INFEC_vs_UN
compound_pk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
C00001,False,H2o,,,,,,,,,,,,
C00002,False,Atp,,,,,,,,,,,,
C00003,False,Nad+,,,,,,,,,,,,
C00004,False,Nadh,,,,,,,,,,,,
C00005,False,Nadph,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C14767,False,9(s)-hode,,,,,,,,,,,,
C15519,False,25-hydroxycholesterol,,,,,,,,,,,,
C15613,False,"(25r)-3alpha,7alpha,12alpha-trihydroxy-5beta-c...",,,,,,,,,,,,
C18125,False,1-acyl-sn-glycero-3-phosphoserine,,,,,,,,,,,,


### Run t-test

In [8]:
case = 'HK'
control = 'UN'

In [9]:
wi = WebOmicsInference(data_df, design_df, data_type, min_value=5000)
result_df = wi.run_ttest(case, control)
result_df.head()

Unnamed: 0,padj,log2FoldChange
C00009,0.937964,0.03141
C00020,0.552928,-0.319297
C00025,0.552928,0.162212
C00037,0.552928,-0.380872
C00041,0.552928,-0.201749


### Run pathway analysis

In [14]:
data_df[data_df['obs'] == True]

Unnamed: 0_level_0,obs,compound_id,UN_1,UN4,INFEC_1,INFEC_2,INFEC_3,INFEC_4,HK1,HK2,HK3,HK4,padj_INFEC_vs_UN,FC_INFEC_vs_UN
compound_pk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
C00009,True,Orthophosphate,381401.8,312837.7,479578.5,230005.6,232082.9,449130.8,236607.4,536029.5,321735.0,380636.2,0.871045,-0.07706
C00020,True,Amp,83745.11,135613.8,66545.36,186587.6,129466.5,128113.0,87448.2,78115.14,79525.53,97963.03,0.80001,0.168774
C00025,True,L-glutamate,20977540.0,18792650.0,21756360.0,17123610.0,37839940.0,19147460.0,26416420.0,20081540.0,22863020.0,20091070.0,0.778123,0.199115
C00037,True,Glycine,1340802.0,1048188.0,759096.0,854259.6,1467938.0,662653.8,1084838.0,766316.8,994945.6,830652.1,0.775449,-0.411692
C00041,True,L-alanine,3537903.0,2808830.0,4275319.0,3713087.0,7178702.0,3782108.0,3134556.0,2539420.0,2901518.0,2443864.0,0.775449,0.531459
C00051,True,Glutathione,656848.2,830609.2,889355.1,822675.8,962527.6,1227659.0,1198968.0,1017580.0,1532543.0,1158258.0,0.775449,0.384573
C00064,True,L-glutamine,395020500.0,289368300.0,257776800.0,161851600.0,403047300.0,182654800.0,285778800.0,315729200.0,314446900.0,278574100.0,0.775449,-0.522202
C00077,True,L-ornithine,1861422.0,2249538.0,2388442.0,1812233.0,5175261.0,1805338.0,2832114.0,1609296.0,2083633.0,1593964.0,0.778123,0.301418
C00078,True,L-tryptophan,2153219.0,1682301.0,1391535.0,1083955.0,2724574.0,918230.2,1948601.0,1545008.0,1847820.0,1635050.0,0.775449,-0.44948
C00079,True,L-phenylalanine,18873000.0,13071570.0,11804690.0,9135548.0,21632010.0,8651260.0,15441890.0,13731810.0,14907770.0,13307080.0,0.775449,-0.39811
