-
Notifications
You must be signed in to change notification settings - Fork 0
/
MarkovModel.java
145 lines (125 loc) · 3.94 KB
/
MarkovModel.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.io.IOException;
import java.util.Random;
import java.util.Scanner;
/**
* MarkovModel.java
* Creates an order K Markov model of the supplied source text.
* The value of K determines the size of the "kgrams" used
* to generate the model. A kgram is a sequence of k consecutive
* characters in the source text.
*
* @author Conner Lane (you@auburn.edu)
* @author Dean Hendrix (dh@auburn.edu)
* @version 2015-11-16
*
*/
public class MarkovModel {
private HashMap<String, String> model = new HashMap<String, String>();
private int size;
private int k;
private String[] kgrams;
private Random kgramChooser;
private Random charChooser;
private String firstKgram;
/**
* Construct the order K model of the file sourceText.
*/
public MarkovModel(int K, File sourceFile) {
String sourceText = "";
k = K;
try {
Scanner scan = new Scanner(sourceFile);
sourceText += scan.nextLine();
while (scan.hasNext()) {
String nextLine = scan.nextLine();
if (!nextLine.equals(""))
sourceText += "\n" + nextLine;
}
}
catch (Exception e) {
}
int count = 1;
size = sourceText.length();
kgrams = new String[size - K + 1];
if (size == K) {
kgrams[0] = sourceText;
model.put(sourceText, null);
return;
}
String following = sourceText.substring(K, K+1);
String kgram = sourceText.substring(0, K);
kgrams[0] = kgram;
model.put(kgram, following);
while (count < size - K + 1) {
kgram = sourceText.substring(count, K + count);
kgrams[count] = kgram;
if (count == size - K) {
model.put(kgram, null);
break;
}
following = sourceText.substring(count + K, K + count + 1);
count++;
if (model.get(kgram) != null)
model.put(kgram, following.concat((String)model.get(kgram)));
else
model.put(kgram, following);
}
}
/**
* Construct the order K model of the string sourceText.
*/
public MarkovModel(int K, String sourceText) {
k = K;
int count = 1;
size = sourceText.length();
String following = sourceText.substring(K, K+1);
kgrams = new String[size - K];
String kgram = sourceText.substring(0, K);
kgrams[0] = kgram;
model.put(kgram, following);
while (count < size - K) {
following = sourceText.substring(count + K, K + count + 1);
kgram = sourceText.substring(count, K + count);
kgrams[count] = kgram;
count++;
if (model.get(kgram) != null)
model.put(kgram, following.concat((String)model.get(kgram)));
else
model.put(kgram, following);
}
}
/** Return the first kgram found in the source text. */
public String firstKgram() {
return kgrams[0];
}
/** Return a random kgram from the source text. */
public String randomKgram() {
kgramChooser = new Random();
if (size - k == 0)
return firstKgram();
return kgrams[kgramChooser.nextInt(size - k + 1)];
}
/**
* Return a single character that follows the given
* kgram in the source text. Select this character
* according to the probability distribution of all
* characters the follow the given kgram in the
* source text.
*/
public char nextChar(String kgram) {
String possibleChars = model.get(kgram);
if (possibleChars == null) {
return '\u0000';
}
if (possibleChars.length() == 1)
return possibleChars.charAt(0);
charChooser = new Random();
int index = charChooser.nextInt(possibleChars.length());
char out = possibleChars.charAt(index);
return out;
}
}