/
stat2dat.sas
111 lines (94 loc) · 3.34 KB
/
stat2dat.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*=
name: STAT2DAT
title: Transform a summary data set to pseudo-observations
Doc: http://www.datavis.ca/sasmac/stat2dat.html
Version: 1.1
Revised: 2 Apr 1999
=Description:
Take a dataset containing summary statistics (N, mean, std dev) for
a between groups design and produce a dataset from which PROC GLM
can be run to produce equivalent results.
=Usage:
%stat2dat(data=inputdataset, out=outputdataset, ...,
depvar=Y, freq=freq)
The input dataset contains one observation for each group.
Supply the names of variables containing the N, MEAN, and standard
deviation (STD) for each group (see argument list below); The
mean square error (MSE) for a reported ANOVA can be supplied instead
of individual STD values. The sample size per cell can be supplied
as a constant rather than a dataset variable if all groups are of the
same size.
The output dataset can then be used with PROC GLM or PROC ANOVA
(balanced designs). It contains all variables from the input dataset
plus a constructed dependent variable ('Y' by default) and
a constructed frequency variable ('freq' by default).
proc glm data=outputdataset;
class classvars;
freq freq;
model Y = modelterms;
Based on: David Larsen, Analysis of Variance With Just Summary Statistics
as Input, The American Statistician, May 1992, Vol. 46(2), 151-152.
(David Larson: dalef@uno.edu)
Michael Friendly <friendly@yorku.ca>
Psychology Department, York University
Toronto, ONT M3J 1P3 CANADA
=*/
%macro stat2dat(
data=_last_, /* name of input data set */
class=, /* names of one or more class varialbes */
n=, /* data set variable containing group N */
mean=, /* data set variable containing group mean */
std=, /* data set variable containing group standard deviation */
mse=, /* or, supply a constant or variable containing MSE */
verify=0, /* non-zero to print computed means, to verify the result */
out=_data_, /* name of output data set */
freq=freq, /* name of constructed frequency variable */
depvar=Y, /* name of constructed dependent variable */
label=, /* label for dependent variable */
expand=N);
%if &n=%str() or &mean=%str() %then %do;
%put ERROR: n= and mean= variables must be supplied;
%goto fini;
%end;
%if &std=%str() and &mse=%str() %then %do;
%put ERROR: Either a std= variables or mse= value must be supplied;
%goto fini;
%end;
/* Calculate values for depvar and freq for 2 pseudo-observations
for each group which would yield the same mean, std and total N
*/
data &out;
set &data;
drop x1sur x2sur var;
%if &std ^= %str()
%then %str( var = &std**2; );
%else %str( var = &mse; );
%if %length(&label)>0 %then %do;
label &depvar = "&label";
%end;
if &n > 0 then do;
x1sur = &mean + sqrt(var/&n);
x2sur = &n*&mean - (&n -1)*x1sur;
&depvar=x1sur; &freq=&n-1; output;
&depvar=x2sur; &freq=1; output;
end;
else do;
put 'ERROR: STAT2DAT- Cannot generate data for non-positive N' ;
_error_=1;
end;
%if %substr(%upcase(&expand),1,1) = Y %then %do;
data &out;
set &out;
do i=1 to &freq;
output;
end;
drop i;
%end;
%if &verify ^= 0 %then %do;
proc means mean std;
class &class;
var &depvar;
freq &freq;
%end;
%fini:;
%mend;