-
Notifications
You must be signed in to change notification settings - Fork 2
/
txt2textab.py
269 lines (226 loc) · 11.4 KB
/
txt2textab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
"""
The :mod:`pyfan.stats.markov.transprobcheck` checks markov transition row sums.
A markov transition matrix where each row does not
sum up to 1 due to simulation errors. Check if the gap between 1 and the row
values are too big, and then normalize.
import pyfan.stats.markov.transprobcheck as pyfan_stats_transprobcheck
Includes method :func:`markov_trans_prob_check` and :func:`markov_condi_prob2one`.
"""
import os
import numpy as np
from pathlib import Path
import pyfan.util.path.getfiles as getfiles
import pyfan.amto.numeric.round as pyfan_amto_round
def tab_txt2tex_f2f(spt_root="", st_rglob='tab_*_fmd.md', **kwargs):
# 1. Find txt files in folder
ls_spn_found_fmd_txt = getfiles.fp_search_rglob(spt_root=spt_root, st_rglob=st_rglob)
# 2. Loop over all FMD text files
ls_spn_generated_tex = []
for spn_found_fmd_txt in ls_spn_found_fmd_txt:
# 3. Open file and read in all lines
fl_txt_reg_contents = open(spn_found_fmd_txt)
ls_st_txt_regs = fl_txt_reg_contents.readlines()
fl_txt_reg_contents.close()
# 4. Convert FMD lines to TEX table lines
ls_st_tex_returns = tab_txt2tex(ls_st_txt_regs, **kwargs)
# 5. Construct TEX File name same name, different suffix, TXT to TEX
snm_file_name_stem_tex = Path(spn_found_fmd_txt).stem + '.tex'
path_no_file, __ = os.path.split(spn_found_fmd_txt)
spn_tex_file = os.path.join(path_no_file, snm_file_name_stem_tex)
ls_spn_generated_tex.append(spn_tex_file)
# 6. Write to tex
fl_tex = open(spn_tex_file, "w")
# Read list of strings line by line and write to tex
for st_tex_return in ls_st_tex_returns:
fl_tex.write(st_tex_return)
# close tex file
fl_tex.close()
return ls_spn_generated_tex
def tab_txt2tex(ls_st_txt_regs,
it_col_count=6, fl_adj_box_maxwidth=1,
it_or_dc_round_decimal=2,
fl_col_label_width_cm=5, fl_col_coef_width_cm=2,
fl_indent_pound1_mm=0, fl_indent_pound2_mm=0, fl_indent_pound3_mm=6):
"""Markov conditional transition probability check
Parameters
----------
it_col_count : int
Number of latex table columns
fl_atol_per_row : `float`, optional
Tolerance for the difference between 1 and each row sum
fl_atol_avg_row : `float`, optional
Tolerance for the difference between 1 and average of row sums
fl_sum_to_match : `float`, optional
This should be 1, unless the function is not used to handle transition matrixes
Returns
-------
list string formated to tex to return
A tuple of booleans, the fiit element is if satisfies the overall criteria. Second
is if satisifes the per_row condition. Third if satisfies the average criteria.
Examples
--------
# >>> mt_ar1_trans = np.array([[0.4334, 0.5183, 0.0454],
# >>> [0.2624, 0.5967, 0.1245],
# >>> [0.1673, 0.5918, 0.2005]])
# >>> bl_ar1_sum_pass, bl_per_row_pass, bl_avg_row_pass = markov_trans_prob_check(mt_ar1_trans)
# >>> print(f'{bl_ar1_sum_pass=}')
# bl_ar1_sum_pass=False
# >>> print(f'{bl_per_row_pass=}')
# bl_per_row_pass=False
# >>> print(f'{bl_avg_row_pass=}')
bl_avg_row_pass=False
"""
# Return list
ls_st_tex_returns = []
# Parse lines one by one
for st_txt_regq_line in ls_st_txt_regs:
# delete linebreak at end of line
st_txt_regq_line = st_txt_regq_line.replace("\n", "")
# Check start of line for special characters
st_N2_char = st_txt_regq_line[0:2]
st_N3_char = st_txt_regq_line[0:3]
st_N4_char = st_txt_regq_line[0:4]
st_N5_char = st_txt_regq_line[0:5]
# Conditional processing
st_text_out = ''
if st_N5_char == '###> ':
# Variable heading, with 3rd level indent, to be connected with coefficients etc
st_txt_regq_line = st_txt_regq_line.replace("###> ", "")
st_text_out = "\hspace*{" + str(fl_indent_pound3_mm) + "mm}" + st_txt_regq_line
elif st_N4_char == '##> ':
# Variable heading, with 2nd level indent, to be connected with coefficients etc
st_txt_regq_line = st_txt_regq_line.replace("##> ", "")
st_text_out = "\hspace*{" + str(fl_indent_pound2_mm) + "mm}" + st_txt_regq_line
elif st_N4_char == '### ':
# Group heading, with 3nd level indent
st_txt_regq_line = st_txt_regq_line.replace("### ", "")
st_text_out = "\\addlinespace\n"
st_text_out = st_text_out + "\multicolumn{" + str(it_col_count) + "}{l}{\hspace*{" + str(
fl_indent_pound3_mm) + "mm}\\textit{" + st_txt_regq_line + "}}\\\\\n"
st_text_out = st_text_out + '\\addlinespace\n'
elif st_N3_char == '## ':
# Group heading, with 2nd level indent
st_txt_regq_line = st_txt_regq_line.replace("## ", "")
st_text_out = "\\addlinespace\n"
st_text_out = st_text_out + "\multicolumn{" + str(it_col_count) + "}{l}{\hspace*{" + str(
fl_indent_pound2_mm) + "mm}" + st_txt_regq_line + "}\\\\\n"
st_text_out = st_text_out + '\\addlinespace\n'
elif st_N2_char == '# ':
# title line and initialize file lines
st_txt_regq_line = st_txt_regq_line.replace("# ", "")
st_text_out = st_text_out + "\\begin{table}[htbp]\n"
st_text_out = st_text_out + "\centering\n"
st_text_out = st_text_out + "\caption{\hspace*{" + str(
fl_indent_pound1_mm) + "mm}" + st_txt_regq_line + "}\n"
st_text_out = st_text_out + "\\begin{adjustbox}{max width=" + str(
fl_adj_box_maxwidth) + "\\textwidth}\n"
st_text_out = st_text_out + \
"\\begin{tabular}{m{" + str(fl_col_label_width_cm) + "cm}" \
"*{" + str(
it_col_count - 1) + "}{>{\centering\\arraybackslash}" \
"m{" + str(fl_col_coef_width_cm) + "cm}}}\n"
elif st_N2_char == '> ':
# latex code line, include as it appears
st_txt_regq_line = st_txt_regq_line.replace("> ", "")
st_text_out = st_txt_regq_line + "\n"
else:
# Assume no headline/code row is comma separated
ls_st_estimates = st_txt_regq_line.split(",")
# Loop over each value separated by commas
for it_esti_ctr, st_esti in enumerate(ls_st_estimates):
# Default update is to keep current
st_esti_update = st_esti
# If estimates, might have stars, first check star count
# delete stars, for numeric conversion and rounding
it_star_count = np.nan
if "***" in st_esti:
it_star_count = 3
st_esti = st_esti.replace("***", "")
elif "**" in st_esti:
it_star_count = 2
st_esti = st_esti.replace("**", "")
elif "*" in st_esti:
it_star_count = 1
st_esti = st_esti.replace("*", "")
else:
it_star_count = 0
# Check if has brakcets
it_bracket_count = np.nan
if "(" in st_esti:
it_bracket_count = 1
st_esti = st_esti.replace("(", "")
if ")" in st_esti:
it_bracket_count = 2
st_esti = st_esti.replace(")", "")
# Decimal Rounding
try:
# numerical
# fl_esti_rounded = round(float(st_esti), it_round_decimal)
fl_esti_rounded = float(st_esti)
st_esti_rounded = pyfan_amto_round.ff_decimal_rounder(
ls_fl_num2format=[fl_esti_rounded],
it_or_dc_round_decimal=it_or_dc_round_decimal)[0]
except Exception:
# Might be non-numeric
st_esti_rounded = st_esti
# Conditional Processing for Point Estimates and SE
if it_bracket_count is np.nan:
# A. No brackets, these are point estimates
# Convert Estimate
st_esti_starred = st_esti_rounded
if it_star_count == 3:
st_esti_starred = st_esti_rounded + "\sym{***}"
elif it_star_count == 2:
st_esti_starred = st_esti_rounded + "\sym{**}"
elif it_star_count == 1:
st_esti_starred = st_esti_rounded + "\sym{*}"
st_esti_update = st_esti_starred
else:
# B. brackets, these are standard errors
if it_bracket_count == 2:
st_esti_update = "(" + st_esti_rounded + ")"
else:
raise TypeError(f'{ls_st_estimates=} and {st_esti=}, missing bracket')
# Update List
ls_st_estimates[it_esti_ctr] = st_esti_update
# Flatten comman
st_text_out = ' & '.join(ls_st_estimates)
if len(st_text_out) > 0:
# add ampersand front
st_text_out = ' & ' + st_text_out + '\\\\\n\\addlinespace\n'
# st_text_out = st_txt_regq_line
# st_text_out = st_text_out.replace("***", "\sym{+++}")
# st_text_out = st_text_out.replace("**", "\sym{==}")
# st_text_out = st_text_out.replace("*", "\sym{*}")
# st_text_out = st_text_out.replace("\sym{==}", "\sym{**}")
# st_text_out = st_text_out.replace("\sym{+++}", "\sym{***}")
if len(st_text_out) > 0:
ls_st_tex_returns.append(st_text_out)
# close connection
ls_st_tex_returns.append("\\end{tabular}\n")
ls_st_tex_returns.append("\\end{adjustbox}\n")
ls_st_tex_returns.append("\\end{table}\n")
# return
return ls_st_tex_returns
if __name__ == '__main__':
spt_root = 'C:/Users/fan/Box/Pollution and inequality/drafts/paper_final_tab/'
ls_st_rglob = ['table_main1_fmd.md', 'table_main1_fmd_long.md']
it_or_dc_round_decimal_tab1 = {100: 2, float("inf"): 0}
for st_rglob in ls_st_rglob:
tab_txt2tex_f2f(spt_root=spt_root, st_rglob=st_rglob,
it_col_count=8,
fl_col_coef_width_cm=1.5, fl_col_label_width_cm=7.75,
it_or_dc_round_decimal=it_or_dc_round_decimal_tab1,
fl_adj_box_maxwidth=1.1)
ls_st_rglob = ['table_main2_fmd.md']
for st_rglob in ls_st_rglob:
tab_txt2tex_f2f(spt_root=spt_root, st_rglob=st_rglob,
it_col_count=7, fl_col_label_width_cm=2.5,
it_or_dc_round_decimal=2)
ls_st_rglob = ['table_main4_fmd.md', 'table_main5_fmd.md',
'table_app2_fmd.md', 'table_app3_fmd.md']
for st_rglob in ls_st_rglob:
tab_txt2tex_f2f(spt_root=spt_root, st_rglob=st_rglob, it_col_count=6, fl_col_label_width_cm=5.2)
ls_st_rglob = ['table_main3_fmd.md', 'table_app1_fmd.md']
for st_rglob in ls_st_rglob:
tab_txt2tex_f2f(spt_root=spt_root, st_rglob=st_rglob, it_col_count=5, fl_col_label_width_cm=5.2)