# Convert raw OTU abundance table to absolute abundances and format for DIVERS variance and covariance decomposition

## 1) Read in sample metadata

In [13]:
%Directory containing sample metadata
meta_file_dir = ['/Path/To/.../metadata/'];
MD = readtable([meta_file_dir 'Gut_Metadata.xlsx']);

%Rows 1-88 are samples we are interested in (others are control wells)
md_weights = table2array(MD(1:88,10)); %Sample weights in mg
md_names = table2array(MD(1:88,16)); %Sample names
md_days = table2array(MD(1:88,5)); %Sample time points



## 2) Use metadata to establish desired sample order in OTU relative abundance table

In [14]:
%Desired order of sample names
target_names = cat(1,md_names(1:60),md_names(67:88));
target_names = cat(1,target_names,md_names(61:66));

%Desired order of sample weights
target_ws = cat(1,md_weights(1:60),md_weights(67:88));
target_ws = cat(1,target_ws,md_weights(61:66));
target_weights = [];
for i = 1:length(target_ws)
    target_weights(i) = str2double(target_ws(i));
end

%Desired order of sample time points
target_days = cat(1,md_days(1:60),md_days(67:88));
target_days = cat(1,target_days,md_days(61:66));

%These will be final sample indices - OTU table will be arranged like this
time_inds = 1:60;
space_inds = [40 41 61:72];
tech_inds = [41 42 73:82];


## 3) Read in OTU relative abundance table

In [15]:
%Directory containing OTU relative abundance table
otu_file_dir = ['/Users/brianji/Documents/dv_lab/Manuscripts/noise/revisions/round2/reviewer_code/v3/raw_abundances/'];
T = readtable([otu_file_dir 'gut_table.txt'],'Delimiter','\t');

samples = T.Properties.VariableNames; samples = samples(2:end-1); %Sample names in OTU table
otu_ids = table2array(T(:,1)); %Identifiers of OTUs
tax = table2array(T(:,end)); %Full taxonomy of OTUs
data = table2array(T(:,2:end-1)); %Relative abundances of OTUs
[M,N] = size(data);



## 4) Rearrange OTU table into desired order as specified in step (2)

In [16]:
% Find mapping between metadata and OTU samples
target_inds = [];
for i = 1:length(target_names)
   target_inds = [target_inds; find(strcmp(target_names(i),samples))]; 
end

%Rearrange OTU table in preferred order
data = data(:,target_inds);

%Indices of technical replicate samples in OTU table
X_inds = time_inds(2:3:60);
Y_inds = time_inds(3:3:60);

%Indices of second spatial replicate sample in OTU table
Z_inds = time_inds(1:3:60);


## 5) Calculate total bacterial densities in each sample from spike-in abundances

In [17]:
%Relative abundance of spike-in strain in each sample (OTU 1)
spike_otu_abunds = data(1,:) ./ sum(data,1);

%Calculate total bacterial density per sample (up to scaling constant)
abs_abunds = (1-spike_otu_abunds) ./ (spike_otu_abunds .* target_weights);

%Renormalize total bacterial densities to mean of 1
abs_abunds_norm = abs_abunds ./ mean(abs_abunds);
abund_mat = repmat(abs_abunds_norm,M,1);

%% Rename stuff before saving
samples = target_names;
weights = target_weights;
days = target_days;

## 6) Convert relative OTU abundances to absolute OTU abundances

In [18]:
[M,N] = size(data);
data_rel = data(2:end,:) ./ repmat(sum(data(2:end,:),1),M-1,1); %Relative abundances ignoring spike-in OTU
data_abs = data_rel .* abund_mat(2:end,:) * 1; %Average absolute abundance (AU) across samples is normalized to 1

%%Update taxonomic info of OTUs (ignore the spike-in OTU now)
tax = tax(2:end); 
otu_ids = otu_ids(2:end);

% Split data into technical replicates (X and Y) and second spatial
% replicate (Z)
data_X = data_abs(:,X_inds);
data_Y = data_abs(:,Y_inds);
data_Z = data_abs(:,Z_inds);

## 7) Save to output that is used by the DIVERS variance and covariance decomposition model

In [19]:
%Directory to save data into
save_dir = ['/Path/To/.../absolute_abundances/'];

%% Create absolute abundance tables

%X (Samples representing one of the two technical replicates from each time point)
table_X = [table(otu_ids) array2table(data_X) table(tax)];
table_X.Properties.VariableNames = {'OTU_ID','T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','Taxonomy'};

%Y (Samples representing the second of two technical replicates from each time point)
table_Y = [table(otu_ids) array2table(data_Y) table(tax)];
table_Y.Properties.VariableNames = {'OTU_ID','T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','Taxonomy'};

%Z (Samples representing the second spatial replicate from each time point)
table_Z = [table(otu_ids) array2table(data_Z) table(tax)];
table_Z.Properties.VariableNames = {'OTU_ID','T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','Taxonomy'};

%% Write output
writetable(table_X,[save_dir 'data_X.txt'],'Delimiter','\t');
writetable(table_Y,[save_dir 'data_Y.txt'],'Delimiter','\t');
writetable(table_Z,[save_dir 'data_Z.txt'],'Delimiter','\t');




