-
Notifications
You must be signed in to change notification settings - Fork 0
/
Operazione.java
384 lines (322 loc) · 13.5 KB
/
Operazione.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
package org.extraction.services;
import com.google.i18n.phonenumbers.NumberParseException;
import com.google.i18n.phonenumbers.PhoneNumberMatch;
import com.google.i18n.phonenumbers.PhoneNumberUtil;
import com.google.i18n.phonenumbers.Phonenumber;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class allows to extract different kind of object from a string and returns an arraylist with
* the finded objects. First of all the class must be initialized with the object you want to extract.
* You can extract telephone numbers, e-mails and URLs.
* If the extract function returns null in case of wrong initialization or in case of no objects are finded
*
* @author Giuliano Tortoreto
*/
public class Operazione {
private String country = "";
private String operazione;
private String DEFAULT = "--"; //every country
private String prefisso = DEFAULT;// the dial-code for the country we are looking for country
private Integer minimaLunghezzaNum = 9;
/**
* Never used
* This is a constructor where you can choose the smallest telephone number you will find
*
* @param operazione
* @param minlunghezzaNumero
*/
public Operazione(String operazione, int minlunghezzaNumero) {
this.operazione = operazione;
this.minimaLunghezzaNum = minlunghezzaNumero;
}
/**
* The only Operazione constructor the plugin is using. Every operation will choose the user
* the plugin will call this constructor with prefisso and country empty in the case of
* the extraction to perform is about e-mails and urls
*
* @param operazione this is the parameter that determines which kind of object the function will extract
* @param prefisso contains the value of the country dial code -> this is useless in case of the extraction to perform is e-mails and urls extraction, but basic for telephone numbers extraction
* @param country contains the country code(2 letters) -> this is useless in case of the extraction to perform is e-mails and urls extraction, but basic for telephone numbers extraction
*/
public Operazione(String operazione, String prefisso, String country) {
this.operazione = operazione;
this.prefisso = prefisso;
this.country = country;
}
public Operazione(String operazione) {
this.operazione = operazione;
}
/**
* This function choose which function to call depending on the chosen operation
*
* @param text the given text to check
* @return an object containing the list of the found object
*/
public Oggetto extract(String text) {
Oggetto risultato = null;
if (this.operazione.equals("E-mails")) {
risultato = getEmails(text);
} else if (this.operazione.equals("numbers")) {
risultato = getNumbers(text);
} else if (this.operazione.equals("URLs")) {
risultato = getSites(text);
} else if (this.operazione.equals("Identification_Numbers")) {
risultato = getIdentificationNumbers(text);
}
return (risultato == null) ? new Oggetto() : risultato;
}
/**
* This function finds all url in a given text
*
* @param text the given text to check
* @return an object Oggetto containing all finded URL
*/
private Oggetto getSites(String text) {
Oggetto risultati = new Oggetto();
Pattern ptr = Pattern.compile("(((^|(\\G)|[\\n\\s,])[w]{3}[\\.])|(http:(//)?)|(https:(//)?))([\\w]+[\\.][\\w]+)+(/(\\w|[\\W&&[^,\\s]])*)*([\\n\\s,]|$)");
Matcher isSite = ptr.matcher(text);
String site;
while (isSite.find()) {
site = isSite.group();
site = cleanSite(site);
risultati.addOggettoTrovato(site);
}
return risultati;
}
/**
* This function cleans out the finded url
*
* @param site the site to clean from comma and white spaces
* @return the cleaned URL
*/
private String cleanSite(String site) {
Pattern ptr = Pattern.compile("[,\\s]");
Matcher cleaner = ptr.matcher(site);
return cleaner.replaceAll("");
}
/**
* This function looks for telephone numbers in a given text.
* Non-italian numbers are found using a google library
*
* @param text the given text to check
* @return an object containing the list of the found object
*/
private Oggetto getNumbers(String text) {
Oggetto risultati = new Oggetto();
PhoneNumberUtil gUtil = PhoneNumberUtil.getInstance();
if (country.equals("IT")) {
//with this regex we find telephone numbers of potentially variable length(but for now it's only from 9 to 13)
//telephone numbers of 13 digits are the old telephone numbers (12 digits) with extension (1 digit)
//this regex doesn't accept numbers with a alfanumeric character before and after
Pattern ptr = Pattern.compile("(([\\D][\\W])|^|\\A|\\G)((\\+|00)[1-9]{1,4})?([\\s()\\-./]{0,2}[0-9]){" + (this.minimaLunghezzaNum).toString() + ",13}($|[\\W&&[^\\s()\\-./]]|([\\s()\\-./]([\\D]|$)))"); //versione con minima lunghezza num settabile
Matcher isnumber = ptr.matcher(text);
while (isnumber.find()) {
String numero = isnumber.group();
numero = cleanNumber(numero);
if (isRightCountry(numero)) {
//risultati.addOggettoTrovato(numero);
try {
//it's really slow because of the parsing of the number
//but is the easiest way to format a telephon number
Phonenumber.PhoneNumber numb = gUtil.parse(numero, "IT");
risultati.addOggettoTrovato(gUtil.format(numb, PhoneNumberUtil.PhoneNumberFormat.INTERNATIONAL));
} catch (NumberParseException e) {
e.printStackTrace();
}
}
}
} else {
Iterable<PhoneNumberMatch> numeri = gUtil.findNumbers(text, country);
for (PhoneNumberMatch numb : numeri) {
risultati.addOggettoTrovato(gUtil.format(numb.number(), PhoneNumberUtil.PhoneNumberFormat.INTERNATIONAL));
System.out.println(numb.rawString());
}
}
return risultati;
}
/**
* First of all this function checks if the dial_code is correct depending on the country we are looking for,
* after that it calls checkPrefix for area code checking and things like that
* nb. for it works online for italian numbers
*
* @param number is the number to verify
*/
private boolean isRightCountry(String number) {
Boolean result = false;
//tolgo tutti gli spazi o simboli in modo da facilitare la verifica del numero
Pattern ptr = Pattern.compile("[\\D&&[^+]]");
String numero = ptr.matcher(number).replaceAll("");
if (prefisso.equals("--"))
if (prefisso.equals("--"))
return true;
//controllo prefisso internazionale
if (numero.startsWith("+")) {
if (!numero.startsWith(prefisso)) {
return numero.startsWith("+4191");//comune di campione d'italia
} else {
numero = numero.substring(prefisso.length());
}
}
if (numero.startsWith("00")) {
if (!(numero.substring(1)).startsWith(prefisso.substring(1))) {
return numero.startsWith("004191"); //eccezione per campione d'italia
} else {
numero = numero.substring(prefisso.length() + 1);//00 instead of +
}
}
//ora il numero è privo di prefisso devo vedere se è un numero corretto
result = checkPrefixForCountries(numero);
return result;
}
/**
* This function check if the area code of the number exist and
* if the mobile telephone number exists
*
* @param numero the number to verify
* @return true if the area code exists in that country
*/
private Boolean checkPrefixForCountries(String numero) {
if (prefisso.equals("+39")) { //italia
if (numero.startsWith("800")) {//numero verde
return true;
} else if (numero.startsWith("199") || numero.startsWith("144") || numero.startsWith("166") || numero.startsWith("709") || numero.startsWith("892") || numero.startsWith("899")) {
return true; //numero a pagamento Italia
} else if (numero.startsWith("0")) { //numero fisso
return true;
} else if (numero.startsWith("3")) { //numero di cellulare
String[] mobileNumbers = {"3", "4", "13", "73", "77", "70", "2", "8", "9", "6"};//prefissi cellulare esistenti al 27/02
Boolean isCorrect = false;
for (int i = 0; i < mobileNumbers.length; i++) {
isCorrect = isCorrect || numero.substring(1).startsWith(mobileNumbers[i]);
}
if (isCorrect) return true;
else return false;
} else {
return false;
}
} else { //actually no checking on code of other nations
return true;
}
}
/**
* This function cleans out the number. It drops non-digit prefix and non-digit suffix.
*
* @param numero the number to clean out
* @return the clean number
*/
private String cleanNumber(String numero) {
Pattern ptr = Pattern.compile("[\\D&&[^+]]");
Matcher cleanfirst = ptr.matcher(numero.substring(0, 3));
while (cleanfirst.find()) {
numero = numero.substring(1);
}
ptr = Pattern.compile("[^0-9]");
Matcher isLastNot = ptr.matcher(numero.substring(numero.length() - 2));
while (isLastNot.find()) {
char[] last = numero.toCharArray();
if (!Character.isDigit(last[last.length - 1]))
numero = numero.substring(0, numero.length() - 1);
}
return numero;
}
/**
* This function looks for e-mail in a given text
*
* @param text the given text to check
* @return an object containing the list of the found object
*/
private Oggetto getEmails(String text) {
// TODO Auto-generated method stub
Oggetto risultati = new Oggetto();
if (text == null) {
return risultati;
}
int n = 0;
int start = 0;
try {
Pattern ptr = Pattern.compile("[_a-zA-Z0-9-+_]+(\\.[_a-zA-Z0-9-+_]+)*(@|'chiocciola')[a-zA-Z0-9-%]+(\\.[a-zA-Z0-9-%]+)*(\\.[a-zA-Z]{2,4})");
String[] parole;
//prima divido le parole per " "
parole = text.split(" ");
Matcher isemail;
for (String word : parole) {
//divido le parole per \n
ArrayList<String> otherWords = new ArrayList(Arrays.asList(word.split("\n")));
start = 0;
for (String parola : otherWords) {
//controllo se la parola è una mail
isemail = ptr.matcher(parola);
while (isemail.find()) {
String se = isemail.group();
se = se.replaceAll("('chiocciola')", "@");
risultati.addOggettoTrovato(se);
}
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return risultati;
}
private Oggetto getIdentificationNumbers(String text) {
Oggetto risultati = new Oggetto();
int[] arrayInt;
String result;
String temp;
if (text == null) {
return risultati;
}
Pattern ptr = Pattern.compile("(([\\D][\\W])|^|\\A|\\G)(([\\s\\-\\.]{0,2}[0-9]){8,24}($|[\\W&&[^\\s\\-\\.]]|([\\s\\-\\.]([\\D]|$))))");
Matcher matcher = ptr.matcher(text);
while (matcher.find()) {
result = matcher.group();
temp = result.replaceAll("(\\D)", "");
arrayInt = new int[0];
try {
arrayInt = stringToIntArray(temp);
if (checkIdentificationNumbers(arrayInt))
risultati.addOggettoTrovato(temp);
} catch (Exception e) {
e.printStackTrace();
}
}
return risultati;
}
/**
* @param digits
* @return
*/
public static boolean checkIdentificationNumbers(int[] digits) {
int sum = 0;
int length = digits.length;
for (int i = 0; i < length; i++) {
// get digits in reverse order
int digit = digits[length - i - 1];
// every 2nd number multiply with 2
if (i % 2 == 1) {
digit *= 2;
}
sum += digit > 9 ? digit - 9 : digit;
}
return sum % 10 == 0;
}
/**
* @param s
* @return
*/
public static int[] stringToIntArray(String s) throws Exception {
int[] intArray;
intArray = new int[s.length()];
for (int i = 0; i < s.length(); i++) {
if (!Character.isDigit(s.charAt(i))) {
throw new Exception("Contains an invalid digit");
}
intArray[i] = Integer.parseInt(String.valueOf(s.charAt(i)));
}
return intArray;
}
}