-
Notifications
You must be signed in to change notification settings - Fork 0
/
yandex_test_task.py
63 lines (47 loc) · 2.37 KB
/
yandex_test_task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from __future__ import division
import numpy as np
import pandas as pd
from math import sqrt
from scipy.stats import norm
#load data from file
data = pd.read_csv("D:/log.txt", delimiter=',', names = ["N", "type", "from_mobile"])
#group data by request type and aggregate by 'from_mobile' value,
stats = data.groupby(['type']).agg({'from_mobile': [np.mean, np.sum, np.size]})
#take parts of requests from mobile for each request type
p_index = stats['from_mobile']['mean']['/index']
p_home = stats['from_mobile']['mean']['/home']
p_test = stats['from_mobile']['mean']['/test']
print 'part of /index requests from mobile gadgets is {:.3f}'.format(p_index)
print 'part of /home requests from mobile gadgets is {:.3f}'.format(p_home)
print 'part of /test requests from mobile gadgets is {:.3f}'.format(p_test)
#take total number of requests for each request type
n_index = stats['from_mobile']['size']['/index']
n_home = stats['from_mobile']['size']['/home']
n_test = stats['from_mobile']['size']['/test']
#func for calculating confidence interval
def conf_interval(value, p, n):
var = p*(1-p)/n
sigma = sqrt(var)
return norm.interval(value, loc=p, scale=sigma)
conf_int_index = conf_interval(0.95, p_index, n_index)
conf_int_home = conf_interval(0.95, p_home, n_home)
conf_int_test = conf_interval(0.95, p_test, n_test)
print 'confidence interval of 95% for /index request is {:.3f}-{:.3f}'.format(conf_int_index[0], conf_int_index[1])
print 'confidence interval of 95% for /home request is {:.3f}-{:.3f}'.format(conf_int_home[0], conf_int_home[1])
print 'confidence interval of 95% for /test request is {:.3f}-{:.3f}'.format(conf_int_test[0], conf_int_test[1])
#take number of requests from mobile gadgets for each request type
m_index = stats['from_mobile']['sum']['/index']
m_test = stats['from_mobile']['sum']['/test']
#func for calculating Z_score
def Z_score(m1, n1, p1, m2, n2, p2):
p_score = (m1+m2)/(n1+n2)
M = p1 - p2
D = p_score*(1-p_score)*(n1+n2)/(n1*n2)
Z = M/sqrt(D)
return abs(Z)
Z_a = 1.6449 #Z score table value for confidence level of 5%
Z = Z_score(m_index, n_index, p_index, m_test, n_test, p_test)
if Z<Z_a:
print "{:.4f} is less than {} so the hypothesus of P_index and P_test equity at confidence level of 5% is true".format(Z, Z_a)
else:
print "{:.4f} is more than {} so the hypothesus of P_index and P_test equity at confidence level of 5% is false".format(Z, Z_a)