-
Notifications
You must be signed in to change notification settings - Fork 2
/
covariance_analysis_tier2a.m
214 lines (195 loc) · 8.58 KB
/
covariance_analysis_tier2a.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
function covariance_analysis_tier2a(g1t1_struct,g2t1_struct,cmask,pthresh)
narginchk(2,4)
% Input
% g1t1_struct - output from the tier 1 script for group 1 loaded into a
% matlab structure.
% g2t1_struct - output from the tier 1 script for group 2 loaded into a
% matlab structure.
% optional:
% cmask - Binary matrix equal in size to data elements in the cellData
% that was used as tier1 input. It is used to isolate subsets of groups
% to be analyzed.
% pval - p-values for threholded networks examined in tier1 to be
% compared here.
%
% Author: Evgeny Chumin (2023), Indiana University
%%
% check to make sure inputs are structures
if ~isstruct(g1t1_struct) || ~isstruct(g2t1_struct)
fprintf(2,'input group tier1 outputs are not structures. Exiting...\n')
return
end
% check to make sure row col names are the same among the two structures
if length(g1t1_struct.cNames)~=length(g2t1_struct.cNames) || ...
length(g1t1_struct.rNames)~=length(g2t1_struct.rNames)
fprintf(2,'Unequal number of rows/columns between group cells. Exiting...\n')
return
end
% check number of nodes
if g1t1_struct.N ~= g2t1_struct.N
fprintf(2,'Number of nodes does not match between inputs. Exiting...\n')
return
end
% optional cmask should be the same size as number of DATA cells in the
% original input cell structure
if exist('cmask','var')
[r,c]=size(cmask);
if r ~= g1t1_struct.Nr-1 || c ~= g1t1_struct.Nc-1
fprintf(2,'size of cmask does not match size data cells. Exiting...\n')
return
end
else
cmask=ones(tier1_out_group1.Nr-1,tier1_out_group1.Nc-1);
end
%
if length(g1t1_struct.pval)~=length(g2t1_struct.pval)
fprintf(2,'Unequal number of p-value thresholds between groups.\n')
fprintf(2,'Make sure both groups were ran with the same p-value inputs. Exiting...\n')
return
elseif sum(ismember(g1t1_struct.pval,g2t1_struct.pval))~=length(g1t1_struct.pval)
fprintf(2,'P-value thresholds do not match between groups. Exiting...\n')
return
end
if exist('pthresh','var')
pidx = find(ismember(g1t1_struct.pval,pthresh));
else
pidx = 1:1:length(g1t1_struct.pval);
end
if isempty(pidx)
fprintf(2,'Input pthresh values not found in group data pvalue thresholds. Exiting...\n')
return
end
%%
% find cells from which to compute summary values
lidx = find(cmask);
% set outout directory
% CREATE NEW NAME FROM GROUP CONCATENATION
outroot = ['tier2a_' g1t1_struct.grp '-' g2t1_struct.grp '_comparison'];
ver = length(dir(fullfile(pwd,[outroot '*'])));
if ver == 0
outdir = fullfile(pwd,outroot);
else
outdir = fullfile(pwd,[outroot '_run' num2str(ver+1)]);
end
clear ver
if ~exist(outdir,'dir')
mkdir(outdir)
end
%%
for p=1:length(pidx) % for every p-value
pcmp = g1t1_struct.pval(pidx(p));
d1 = g1t1_struct.density(pidx(p),:);
d2 = g2t1_struct.density(pidx(p),:);
%% pairwise comparison of edge distribution between g1 and g2
%% compare nodal degree distributions
nbins = 5;
mxidx = max([max(max(g1t1_struct.degree(:,pidx(p),:))) max(max(g2t1_struct.degree(:,pidx(p),:)))]);
for ll=1:length(lidx)
hmax(ll,1) = max(histcounts(g1t1_struct.degree(:,pidx(p),lidx(ll)),nbins));
hmax(ll,2) = max(histcounts(g2t1_struct.degree(:,pidx(p),lidx(ll)),nbins));
end
hmax = max(max(hmax));
for ll=1:length(lidx)
dd1 = g1t1_struct.degree(:,pidx(p),lidx(ll));
lb1 = [g1t1_struct.grp ' ' g1t1_struct.labels{lidx(ll)}];
dd2 = g2t1_struct.degree(:,pidx(p),lidx(ll));
lb2 = [g2t1_struct.grp ' ' g2t1_struct.labels{lidx(ll)}];
figure;
histogram(dd1,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
hold on
histogram(dd2,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
xlim([0 mxidx+1]); xticks(0:2:mxidx+1)
ylim([0 hmax+1]); yticks(0:2:hmax+1)
[~,ksp]=kstest2(dd1,dd2);
xlabel('Degree'); ylabel('Frequency')
title([num2str(pcmp) ' thr: Degree Distributions: KS test p = ' num2str(ksp)])
legend({lb1,lb2},'Location','northeastoutside')
filename = fullfile(outdir, ['Degree_dist_' lb1 '_v_' lb2 '_netw_pthr' num2str(pcmp) '.pdf']);
print(gcf,'-dpdf',filename)
clear filename dd1 dd2 lb1 lb2
end
clear mxidx hmax
%% compare positive strength distributions
mxidx = max([max(max(g1t1_struct.posStrength(:,pidx(p),:))) max(max(g2t1_struct.posStrength(:,pidx(p),:)))]);
for ll=1:length(lidx)
hmax(ll,1) = max(histcounts(g1t1_struct.posStrength(:,pidx(p),lidx(ll)),nbins));
hmax(ll,2) = max(histcounts(g2t1_struct.posStrength(:,pidx(p),lidx(ll)),nbins));
end
hmax = max(max(hmax));
for ll=1:length(lidx)
dd1 = g1t1_struct.posStrength(:,pidx(p),lidx(ll));
lb1 = [g1t1_struct.grp '-' g1t1_struct.labels{lidx(ll)}];
dd2 = g2t1_struct.posStrength(:,pidx(p),lidx(ll));
lb2 = [g2t1_struct.grp '-' g2t1_struct.labels{lidx(ll)}];
figure;
histogram(dd1,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
hold on
histogram(dd2,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
xlim([0 mxidx+1]); xticks(0:2:mxidx+1)
ylim([0 hmax+1]); yticks(0:2:hmax+1)
[~,ksp]=kstest2(dd1,dd2);
xlabel('Positive Strength'); ylabel('Frequency')
title([num2str(pcmp) ' thr: Positive Strength Distributions: KS test p = ' num2str(ksp)])
legend({lb1,lb2},'Location','northeastoutside')
filename = fullfile(outdir, ['pos_strength_dist_' lb1 '_v_' lb2 '_netw_pthr' num2str(pcmp) '.pdf']);
print(gcf,'-dpdf',filename)
clear filename dd1 dd2 lb1 lb2
end
clear mxidx hmax
%% compare negative strength distributions
mxidx = max([max(max(g1t1_struct.negStrength(:,pidx(p),:))) max(max(g2t1_struct.negStrength(:,pidx(p),:)))]);
for ll=1:length(lidx)
hmax(ll,1) = max(histcounts(g1t1_struct.negStrength(:,pidx(p),lidx(ll)),nbins));
hmax(ll,2) = max(histcounts(g2t1_struct.negStrength(:,pidx(p),lidx(ll)),nbins));
end
hmax = max(max(hmax));
for ll=1:length(lidx)
dd1 = g1t1_struct.negStrength(:,pidx(p),lidx(ll));
lb1 = [g1t1_struct.grp '-' g1t1_struct.labels{lidx(ll)}];
dd2 = g2t1_struct.negStrength(:,pidx(p),lidx(ll));
lb2 = [g2t1_struct.grp '-' g2t1_struct.labels{lidx(ll)}];
figure;
histogram(dd1,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
hold on
histogram(dd2,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
xlim([0 mxidx+1]); xticks(0:2:mxidx+1)
ylim([0 hmax+1]); yticks(0:2:hmax+1)
[~,ksp]=kstest2(dd1,dd2);
xlabel('Negative Strength'); ylabel('Frequency')
title([num2str(pcmp) ' thr: Negative Strength Distributions: KS test p = ' num2str(ksp)])
legend({lb1,lb2},'Location','northeastoutside')
filename = fullfile(outdir, ['neg_strength_dist_' lb1 '_v_' lb2 '_netw_pthr' num2str(pcmp) '.pdf']);
print(gcf,'-dpdf',filename)
clear filename dd1 dd2 lb1 lb2
end
clear mxidx hmax
%% compare clustering coefficient distributions
mxidx = max([max(max(g1t1_struct.clust_coef(:,pidx(p),:))) max(max(g2t1_struct.clust_coef(:,pidx(p),:)))]);
for ll=1:length(lidx)
hmax(ll,1) = max(histcounts(g1t1_struct.clust_coef(:,pidx(p),lidx(ll)),nbins));
hmax(ll,2) = max(histcounts(g2t1_struct.clust_coef(:,pidx(p),lidx(ll)),nbins));
end
hmax = max(max(hmax));
for ll=1:length(lidx)
dd1 = g1t1_struct.clust_coef(:,pidx(p),lidx(ll));
lb1 = [g1t1_struct.grp '-' g1t1_struct.labels{lidx(ll)}];
dd2 = g2t1_struct.clust_coef(:,pidx(p),lidx(ll));
lb2 = [g2t1_struct.grp '-' g2t1_struct.labels{lidx(ll)}];
figure;
histogram(dd1,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
hold on
histogram(dd2,'DisplayStyle','stairs','EdgeAlpha',.5,'LineWidth',2,'NumBins',nbins)
xlim([0 mxidx+1]); xticks(0:2:mxidx+1)
ylim([0 hmax+1]); yticks(0:2:hmax+1)
[~,ksp]=kstest2(dd1,dd2);
xlabel('Clustering Coefficient'); ylabel('Frequency')
title([num2str(pcmp) ' thr: Clustering Coefficient Distributions: KS test p = ' num2str(ksp)])
legend({lb1,lb2},'Location','northeastoutside')
filename = fullfile(outdir, ['clust_coef_dist_' lb1 '_v_' lb2 '_netw_pthr' num2str(pcmp) '.pdf']);
print(gcf,'-dpdf',filename)
clear filename dd1 dd2 lb1 lb2
end
clear pcmp mxidx hmax
end
close all
end