-
Notifications
You must be signed in to change notification settings - Fork 233
/
check.py
412 lines (323 loc) · 8.11 KB
/
check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
"""
Helpers to check if an object match a date type
"""
import datetime
import os
import re
import math
from pyspark.sql import DataFrame
from pyspark.sql import functions as F
from optimus.helpers.converter import one_list_to_val, val_to_list
from optimus.helpers.parser import parse_spark_dtypes
from optimus.helpers.raiseit import RaiseIt
def is_nan(value):
"""
Check if a value is nan
:param value:
:return:
"""
result = False
if is_str(value):
if value.lower() == "nan":
result = True
elif is_numeric(value):
if math.isnan(value):
result = True
return result
def is_none(value):
"""
Check if a value is none
:param value:
:return:
"""
result = False
if is_str(value):
if value.lower() == "none":
result = True
elif value is None:
result = True
return result
def is_same_class(class1, class2):
"""
Check if 2 class are the same
:param class1:
:param class2:
:return:
"""
return class1 == class2
def is_(value, type_):
"""
Check if a value is instance of a class
:param value:
:param type_:
:return:
"""
return isinstance(value, type_)
def is_type(type1, type2):
"""
Check if a value is a specific class
:param type1:
:param type2:
:return:
"""
return type1 == type2
def is_function(value):
"""
Check if a param is a function
:param value: object to check for
:return:
"""
return hasattr(value, '__call__')
def is_list(value):
"""
Check if an object is a list
:param value:
:return:
"""
return isinstance(value, list)
def is_list_empty(value):
"""
Check is a list is empty
:param value:
:return:
"""
return len(value) == 0
def is_dict(value):
"""
Check if an object is a list
:param value:
:return:
"""
return isinstance(value, dict)
def is_tuple(value):
"""
Check if an object is a tuple
:param value:
:return:
"""
return isinstance(value, tuple)
def is_column(value):
"""
Check if a object is a column
:return:
"""
return isinstance(value, F.Column)
def is_column_a(df, column, dtypes):
"""
Check if column match a list of data types
:param df: dataframe
:param column: column to be compared with
:param dtypes: types to be checked
:return:
"""
column = val_to_list(column)
if len(column) > 1:
RaiseIt.length_error(column, 1)
data_type = tuple(val_to_list(parse_spark_dtypes(dtypes)))
column = one_list_to_val(column)
# Filter columns by data type
return isinstance(df.schema[column].dataType, data_type)
def is_list_of_str(value):
"""
Check if an object is a list of strings
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, str) for elem in value)
def is_list_of_int(value):
"""
Check if an object is a list of integers
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, int) for elem in value)
def is_list_of_float(value):
"""
Check if an object is a list of floats
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, float) for elem in value)
def is_list_of_str_or_int(value):
"""
Check if an object is a string or an integer
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, (int, str)) for elem in value)
def is_list_of_str_or_num(value):
"""
Check if an object is string, integer or float
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, (str, int, float)) for elem in value)
def is_list_of_dataframes(value):
"""
Check if an object is a Spark DataFrame
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, DataFrame) for elem in value)
def is_filepath(file_path):
"""
Check if a value ia a valid file path
:param file_path:
:return:
"""
# the file is there
if os.path.exists(file_path):
return True
# the file does not exists but write privileges are given
elif os.access(os.path.dirname(file_path), os.W_OK):
return True
# can not write there
else:
return False
def is_ip(value):
"""
Check if a value is valid ip
:param value:
:return:
"""
parts = value.split(".")
if len(parts) != 4:
return False
for item in parts:
if not 0 <= int(item) <= 255:
return False
return True
def is_list_of_strings(value):
"""
Check if all elements in a list are strings
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, str) for elem in value)
def is_list_of_numeric(value):
"""
Check if all elements in a list are int or float
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, (int, float)) for elem in value)
def is_list_of_tuples(value):
"""
Check if all elements in a list are tuples
:param value:
:return:
"""
return bool(value) and isinstance(value, list) and all(isinstance(elem, tuple) for elem in value)
def is_list_of_one_element(value):
"""
Check if a var is a single element
:param value:
:return:
"""
if is_list(value):
return len(value) == 1
def is_dict_of_one_element(value):
"""
Check if a var is a single element
:param value:
:return:
"""
if is_dict(value):
return len(value) == 1
def is_one_element(value):
"""
Check if a var is a single element
:param value:
:return:
"""
return isinstance(value, (str, int, float, bool))
def is_num_or_str(value):
"""
Check if a var is numeric(int, float) or string
:param value:
:return:
"""
return isinstance(value, (int, float, str))
def is_str_or_int(value):
"""
Check if a var is a single element
:param value:
:return:
"""
return isinstance(value, (str, int))
def is_numeric(value):
"""
Check if a var is a single element
:param value:
:return:
"""
return isinstance(value, (int, float))
def is_str(value):
"""
Check if an object is a string
:param value:
:return:
"""
return isinstance(value, str)
def is_int(value):
"""
Check if an object is an integer
:param value:
:return:
"""
return isinstance(value, int)
def is_url(value):
regex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return re.match(regex, value)
def is_float(value):
"""
Check if an object is an integer
:param value:
:return:
"""
return isinstance(value, float)
# TODO: can be confused with is_type
def is_dataframe(value):
"""
Check if an object is a Spark DataFrame
:param value:
:return:
"""
return isinstance(value, DataFrame)
def is_bool(value):
return isinstance(value, bool)
def is_datetime(value):
"""
Check if an object is a datetime
:param value:
:return:
"""
return isinstance(value, datetime.datetime)
def is_binary(value):
"""
Check if an object is a bytearray
:param value:
:return:
"""
return isinstance(value, bytearray)
def is_date(value):
"""
Check if an object is a date
:param value:
:return:
"""
return isinstance(value, datetime.date)
def has_(value, _type):
"""
Check if a list has a element of a specific data type
:param value: list
:param _type: data type to check
:return:
"""
return any(isinstance(elem, _type) for elem in value)