-
Notifications
You must be signed in to change notification settings - Fork 233
/
functions.py
96 lines (78 loc) · 2.38 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import math
import json
from optimus.helpers.constants import ProfilerDataTypes, CONFIDENCE_LEVEL_CONSTANT, PROFILER_COLUMN_TYPES
from optimus.helpers.json import json_converter
def fill_missing_var_types(var_types, data_types):
"""
Fill missing data types with 0
:param var_types:
:param data_types:
:return:
"""
for k, v in var_types.items():
if data_types[k] == ProfilerDataTypes.STRING.value:
for label in ProfilerDataTypes:
if label.value not in v:
var_types[k][label.value] = 0
else:
var_types[k] = v
else:
var_types[k] = v
return var_types
def fill_missing_col_types(col_types):
"""
Fill missing col types with 0
:param col_types:
:return:
"""
for label in PROFILER_COLUMN_TYPES:
if label not in col_types:
col_types[label] = 0
return col_types
def write_json(data, path):
"""
Write a json file with the profiler result
:param data:
:param path:
:return:
"""
try:
with open(path, 'w', encoding='utf-8') as outfile:
json.dump(data, outfile, indent=4, ensure_ascii=False, default=json_converter)
except IOError:
pass
def write_html(data, path):
"""
Write a json file with the profiler result
:param data:
:param path:
:return:
"""
try:
with open(path, 'w', encoding='utf-8') as outfile:
outfile.write(data)
except IOError:
pass
def sample_size(population_size, confidence_level, confidence_interval):
"""
Get a sample number of the whole population
:param population_size: The size of the population
:param confidence_level: Confidence level you want to get from 0 to 100
:param confidence_interval: Confidence interval you want to get
:return:
"""
z = 0.0
p = 0.5
e = confidence_interval / 100.0
n = population_size
# Loop through supported confidence levels and find the num sdd deviations for that confidence level
for i in CONFIDENCE_LEVEL_CONSTANT:
if i[0] == confidence_level:
z = i[1]
if z == 0.0:
return -1
# Calculate sample size
n_0 = ((z ** 2) * p * (1 - p)) / (e ** 2)
# Adjust sample size fo finite population
n = n_0 / (1 + ((n_0 - 1) / float(n)))
return int(math.ceil(n)) # sample size