-
Notifications
You must be signed in to change notification settings - Fork 38
/
make_big.pl
executable file
·119 lines (106 loc) · 3.14 KB
/
make_big.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/perl
# Copyright (C) WikiRing 2007
# Author: Crawford Currie
# Generate large test data. This script is primarily designed for running
# stand-alone to generate large test data. It requires /usr/lib/dict
# to be installed.
use strict;
sub usage {
print STDERR <<USAGE;
Usage: $0 <options>
Options:
-webs <w> - generate <w> webs (default 1)
-topics <t> - generate <t> additional topics in
each web (default 0)
-size <s> - target number of words to put in
each additional topic (default 501)
-base <base> - base for new web and topic names
(default IncredibleHulk)
This script must be run while cd'ed to the root directory
of a TWiki install. It checks for data and pub dirs and
refuses to run without them.
Using <base> as the base for new web names, generates
<w> new webs using _default as the basis. The standard
topics from _default are always included, and will
generate <t> additional topics in each generated web.
Additional topics are named using <base>, are plain text,
and are generated using words picked from
/usr/share/dict. Each new topic will contain <s> words
taken sequentially from the dictionary. Generated topics
have no histories and no meta-data, just text.
Web names are generated from <base> by appending decimal
numbers to generate unique web names. Topic names are
generated the same way. You are recommended to use unique
web names to make a later rm -r as safe as possible.
USAGE
exit 1;
}
unless (-w "data" && -w "pub") {
usage();
}
my $dict = '/usr/share/dict/words';
my $dict_fh;
# Get $n words from the dictionary
sub getWords {
my ($n) = @_;
local $/ = "\n";
my $words = '';
my $word;
if (!$dict_fh) {
open($dict_fh, '<', $dict) || die $!;
}
while ($n) {
while ($n && ($word = <$dict_fh>)) {
$words .= $word;
$n--;
}
last unless $n;
close($dict_fh);
open($dict_fh, '<', $dict);
}
return $words;
}
my %opts = (
webs => 1,
topics => 0,
size => 501,
base => 'IncredibleHulk',
);
while (my $arg = shift @ARGV) {
if ($arg =~ /^-(\w+)$/) {
$opts{$1} = shift @ARGV;
} else {
print STDERR "Unrecognised option $arg";
usage();
}
}
my $newWebs = 0;
my $nextWeb = 0;
while ($newWebs < $opts{webs}) {
while (-e "data/$opts{base}$nextWeb") {
$nextWeb++;
}
my $web = "$opts{base}$nextWeb";
# Create the web
mkdir("data/$web");
`cp data/_default/*.txt data/$web`;
my $newTopics = 0;
my $nextTopic = 0;
while ($newTopics < $opts{topics}) {
while (-e "data/$web/$opts{base}$nextTopic.txt") {
$nextTopic++;
}
my $topic = "$opts{base}$nextTopic";
open(TOPIC, '>', "data/$web/$topic.txt") || die $!;
my $t = time();
print TOPIC <<FLUFF;
%META:TOPICINFO{author="ProjectContributor" date="$t" format="1.1" version="1"}%
FLUFF
print TOPIC getWords($opts{size});
close(TOPIC);
$newTopics++;
print "Generated topic $topic \r";
}
print "Generated web $web \n";
$newWebs++;
}