forked from INET-Complexity/isle
/
condition_aux.py
148 lines (137 loc) · 3.68 KB
/
condition_aux.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""Auxiliary functions and data for calibration test functions"""
import numpy as np
"""Data"""
"""Bloomberg size data for US firms"""
insurance_firm_sizes_empirical_2017 = [
42.4701,
108.0418,
110.2641,
114.437,
130.2988,
133.674,
146.438,
152.3354,
239.032,
337.689,
375.914,
376.988,
395.859,
436.191,
482.503,
585.824,
667.849,
842.264,
894.848,
896.227,
904.873,
1231.126,
1357.016,
1454.999,
1518.236,
1665.859,
1681.94,
1737.9198,
1771.21,
1807.279,
1989.742,
2059.921,
2385.485,
2756.695,
2947.244,
3014.3,
3659.2,
3840.1,
4183.431,
4929.197,
5101.323,
5224.622,
5900.881,
7686.431,
8376.2,
8439.743,
8764.0,
9095.0,
11198.34,
14433.0,
15469.6,
19403.5,
21843.0,
23192.374,
24299.917,
25218.63,
31843.0,
32051.658,
32805.016,
38701.2,
56567.0,
60658.0,
79586.0,
103483.0,
112422.0,
167022.0,
225260.0,
498301.0,
702095.0,
]
reinsurance_firm_sizes_empirical_2017 = [
396.898,
627.808,
6644.189,
15226.131,
25384.317,
23591.792,
3357.393,
13606.422,
4671.794,
614.121,
60514.818,
24760.177,
2001.669,
182.2,
12906.4,
]
"""Functions"""
def condition_stationary_state(series):
"""Stationarity test function for time series. Tests if the mean of the last 25% of the time series is within 1-2
standard deviation of the mean of the middle section (between 25% and 75% of the time series). The first
25% are not considered to discard the transient.
Arguments:
series: Type list of numeric or numpy array. The time series
Returns:
Calibration score between 0 and 1. Is 1 if last 25% are within one standard deviation, between 0 and 1 if
they are between 1 and 2 standard deviations, 0 otherwise."""
"""Compute means and standard deviation"""
mean_reference = np.mean(series[int(len(series) * 0.25) : int(len(series) * 0.75)])
std_reference = np.std(series[int(len(series) * 0.25) : int(len(series) * 0.75)])
mean_test = np.mean(series[int(len(series) * 0.75) : int(len(series) * 1.0)])
"""Compute score"""
score = 1 + (np.abs(mean_test - mean_reference) - std_reference) / std_reference
score = 1 if score > 1 else score
score = 0 if score < 0 else score
"""Set score to one if standard deviation is zero"""
if (
score == np.nan
and np.std(series[int(len(series) * 0.25) : int(len(series) * 0.75)]) == 0
):
score = 1
return score
def scaler(
series
): # TODO: find a better way to scale heavy-tailed distributions than to use standard score scaling on logs
"""Function to do a standard score scaling of the log of a heavy-tailed distribution. This is used to calibrate
distributions where the unit is not important (distributions of sizes of firms e.g.). This would be perfectly
appropriate for lognormal distributions, but should work reasonably well for calibration of other heavy-tailed
distributions. An alternative would be a scaling robust towards outliers (as included in the sklearn package).
Arguments:
series: Type list of numeric or numpy array. The time series
Returns:
Calibratied series."""
series = np.asarray(series)
if not (series > 1).all():
raise ValueError("Series is not uniformly greater than one")
logseries = np.log(series)
mean = np.mean(logseries)
std = np.std(logseries)
z = (logseries - mean) / std
newseries = np.exp(z)
return newseries