# Import libraries

In [1]:
import pandas as pd
from scipy.stats import wilcoxon
import matplotlib.pyplot as plt
from matplotlib import rc
import numpy as np
import utils
import config
import re
import os

In [2]:
# set figure details
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)

# Metadata

In [3]:
metadata = pd.read_csv(os.path.join(config.CLEAN_DIR, "metadata.tsv"), sep="\t", index_col=0)
metadata["date_time"] = pd.to_datetime(metadata['Date_Collected'])
metadata

Unnamed: 0_level_0,Patient_No,Hospital,Date_Collected,Time_Collected,Timeline_Weeks,Abx_regular,Abx_anomoly,Abx_timeline.prior..during..after.,abx_day,Consistency,...,currentfeed_bf,currentfeed_f,currentfeed_o,Sequencing_Project,Filename,Extraction,depth,bos_taurus_read_count,unreduced_gene_richness,date_time
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
magic.0001,101,no,12/10/16,1430,0,regular,,,,,...,1.0,0.0,magic.0001,Knights_Project_046,magic.0001.S22.001.fa,MagAttract,1544781,37,64331.0,2016-12-10
magic.0004,101,no,12/15/16,,1,regular,,,,,...,1.0,0.0,magic.0004,Knights_Project_046,magic.0004.S34.001.fa,MagAttract,1167038,30,69105.0,2016-12-15
magic.0006,104,yes,12/15/16,2335,0,regular,,,,,...,1.0,1.0,magic.0006,Knights_Project_076_Pool3,magic.0006.S145.001.fa,PowerSoil,1720636,13268,21372.0,2016-12-15
magic.0007,105,yes,12/15/16,2115,0,regular,,,,,...,1.0,1.0,magic.0007,Knights_Project_076_Pool3,magic.0007.S155.001.fa,PowerSoil,6825017,3304,89717.0,2016-12-15
magic.0008,105,yes,12/15/16,1525,0,regular,,,,,...,1.0,1.0,magic.0008,Knights_Project_055,X8.S125.001.fa,PowerSoil,2928044,476,61474.0,2016-12-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
magic.6595,588,,8/22/21,1702,96,regular,,,,,...,,,magic.6595,Knights_Project_077_Pool3,magic.6595.S253.001.fa,PowerSoilPro,1562968,2761,185844.0,2021-08-22
magic.6597,595,,9/6/21,,96,regular,,,,formed,...,,,magic.6597,Knights_Project_077_Pool3,magic.6597.S265.001.fa,PowerSoilPro,2113880,2339,176610.0,2021-09-06
magic.6598,585,,8/23/21,725,96,regular,,,,formed,...,,,magic.6598,Knights_Project_077_Pool3,magic.6598.S277.001.fa,PowerSoilPro,1650388,1853,142285.0,2021-08-23
magic.6599,573,,7/21/21,845,96,regular,,,,soft,...,,,magic.6599,Knights_Project_077_Pool3,magic.6599.S288.001.fa,PowerSoilPro,1519641,1694,155385.0,2021-07-21


In [4]:
metadata["abx_day"].value_counts()

10         232
2          193
5          186
30         135
0           83
na          26
1           16
6            6
7            6
3            5
4            3
8            2
13           2
14           2
9            2
20           2
31           1
11           1
32           1
5 or 10      1
2 & 5        1
Na           1
Name: abx_day, dtype: int64

In [9]:
metadata["Timeline_Weeks"].value_counts()

4     307
1     301
24    298
12    278
48    274
96    220
3     184
2     169
0     162
72    149
36    112
60     73
na     50
84     41
Name: Timeline_Weeks, dtype: int64

In [5]:
# subject gene richness z score for subject age vs abx state

In [6]:
subjects = metadata["Patient_No"].unique()
subjects

array([101, 104, 105, 107, 102, 106, 109, 108, 119, 122, 117, 115, 120,
       111, 116, 125, 124, 126, 127, 136, 131, 137, 135, 142, 133, 140,
       144, 143, 141, 123, 149, 148, 151, 150, 147, 155, 153, 158, 162,
       160, 145, 161, 167, 166, 168, 169, 165, 170, 157, 164, 172, 174,
       173, 181, 185, 180, 175, 187, 183, 177, 184, 186, 188, 189, 193,
       198, 195, 132, 192, 190, 191, 196, 197, 199, 138, 202, 204, 203,
       208, 201, 205, 209, 211, 210, 217, 219, 218, 222, 224, 220, 221,
       223, 225, 213, 227, 233, 229, 231, 226, 235, 228, 247, 237, 238,
       239, 245, 243, 241, 249, 242, 253, 250, 251, 263, 256, 254, 258,
       257, 266, 260, 261, 267, 264, 248, 265, 268, 259, 270, 252, 271,
       269, 275, 276, 272, 280, 277, 282, 281, 234, 273, 283, 285, 289,
       286, 287, 290, 293, 298, 299, 297, 300, 302, 301, 295, 308, 307,
       312, 304, 305, 274, 313, 318, 296, 306, 315, 322, 319, 324, 333,
       328, 321, 329, 330, 332, 323, 341, 335, 327, 340, 337, 33

In [7]:
metadata

Unnamed: 0_level_0,Patient_No,Hospital,Date_Collected,Time_Collected,Timeline_Weeks,Abx_regular,Abx_anomoly,Abx_timeline.prior..during..after.,abx_day,Consistency,...,currentfeed_bf,currentfeed_f,currentfeed_o,Sequencing_Project,Filename,Extraction,depth,bos_taurus_read_count,unreduced_gene_richness,date_time
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
magic.0001,101,no,12/10/16,1430,0,regular,,,,,...,1.0,0.0,magic.0001,Knights_Project_046,magic.0001.S22.001.fa,MagAttract,1544781,37,64331.0,2016-12-10
magic.0004,101,no,12/15/16,,1,regular,,,,,...,1.0,0.0,magic.0004,Knights_Project_046,magic.0004.S34.001.fa,MagAttract,1167038,30,69105.0,2016-12-15
magic.0006,104,yes,12/15/16,2335,0,regular,,,,,...,1.0,1.0,magic.0006,Knights_Project_076_Pool3,magic.0006.S145.001.fa,PowerSoil,1720636,13268,21372.0,2016-12-15
magic.0007,105,yes,12/15/16,2115,0,regular,,,,,...,1.0,1.0,magic.0007,Knights_Project_076_Pool3,magic.0007.S155.001.fa,PowerSoil,6825017,3304,89717.0,2016-12-15
magic.0008,105,yes,12/15/16,1525,0,regular,,,,,...,1.0,1.0,magic.0008,Knights_Project_055,X8.S125.001.fa,PowerSoil,2928044,476,61474.0,2016-12-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
magic.6595,588,,8/22/21,1702,96,regular,,,,,...,,,magic.6595,Knights_Project_077_Pool3,magic.6595.S253.001.fa,PowerSoilPro,1562968,2761,185844.0,2021-08-22
magic.6597,595,,9/6/21,,96,regular,,,,formed,...,,,magic.6597,Knights_Project_077_Pool3,magic.6597.S265.001.fa,PowerSoilPro,2113880,2339,176610.0,2021-09-06
magic.6598,585,,8/23/21,725,96,regular,,,,formed,...,,,magic.6598,Knights_Project_077_Pool3,magic.6598.S277.001.fa,PowerSoilPro,1650388,1853,142285.0,2021-08-23
magic.6599,573,,7/21/21,845,96,regular,,,,soft,...,,,magic.6599,Knights_Project_077_Pool3,magic.6599.S288.001.fa,PowerSoilPro,1519641,1694,155385.0,2021-07-21
