forked from everknight/LargeODcost
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Test_extract_data_by_od_pairs.py
154 lines (135 loc) · 5.93 KB
/
Test_extract_data_by_od_pairs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import sys, os, math
#sys.path.append(os.path.split(os.path.realpath(__file__))[0])
sys.path.append(r"C:\Users\lirui\OneDrive\Desktop\OD_git\LargeODcost")
import timer_class as tc
input_csv = r"C:\Users\lirui\OneDrive\Desktop\test\Alabama_Alaska_District_of_Columbia_Pennsylvania_2010_zctas_pairs.csv" #r"D:\US_Estimated_ODMatrix\tx_la_2010_zctas_pairs.csv"
output_folder = r"C:\Users\lirui\OneDrive\Desktop\test"
output_csv = output_folder+"\\zcta_OD_extract_for_" + os.path.split(input_csv)[1] #r"D:\US_Estimated_ODMatrix\tx_la_2010_zctas_OD_extracts.csv"
db1 = r"D:\US_Estimated_ODMatrix\US_OD_data_plain\hour03" #r"D:\US_Estimated_ODMatrix\US_OD_data_plain\hour03"
db2 = r"D:\US_Estimated_ODMatrix\US_OD_data_plain\hour36" #r"D:\US_Estimated_ODMatrix\US_OD_data_plain\hour36"
lvl3_at = 33.91
lvl3_bt = 0.88
lvl3_ad = 20.86
lvl3_bd = 1.18
lvl3_speed = 50
def cal_geodesic_dist (x1, y1, x2, y2):
r = 3959 #miles
p1 = math.radians(y1)
p2 = math.radians(y2)
dp = math.radians(y1-y2)
dl = math.radians(x1-x2)
a = math.sin(dp/2) ** 2 + math.cos(p1)*math.cos(p2)*(math.sin(dl/2)**2)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
d = r * c
return d
class data_station:
def __init__ (self, nlimit = 2000000):
self.n = 0
self.data = {}
self.nlimit = nlimit
def push_data(self, key, key1, d):
self.n += 1
if key in self.data:
if key1 in self.data[key]:
#arcpy.AddWarning("Repetitive key pair found: {0} - {1}".format(key, key1))
print("Repetitive key pair found: {0} - {1}".format(key, key1))
self.data[key][key1] = d
else:
self.data[key] = {}
self.data[key][key1] = d
def reset(self):
self.n = 0
self.data = {}
def get_data_from_lv1(ozcta, odict, db1, output_csv):
target_path = db1 + "\\" + ozcta[0:3] + "\\" + ozcta[3:] + ".data"
if os.path.exists(target_path):
f = open(target_path, "r")
outio = open(output_csv,"a")
row = f.readline()
while row:
row_content = row.replace("\n","").split(",")
if row_content[0] in odict:
outio.write("{0},{1},{2},{3},1\n".format(ozcta, row_content[0], row_content[1], row_content[2]))
del odict[row_content[0]]
row = f.readline()
f.close()
outio.close()
return odict
def get_data_from_lv2(ozcta, odict, db2, output_csv):
base_path = db2 + "\\" + ozcta[0:3] + "\\" + ozcta[3:]
dzcta2_index = {}
for each_dzcta in odict:
dzcta2 = each_dzcta[0:2]
if dzcta2 in dzcta2_index:
dzcta2_index[dzcta2][each_dzcta[2:]] = True
else:
dzcta2_index[dzcta2] = {}
dzcta2_index[dzcta2][each_dzcta[2:]] = True
for each_dzcta2 in dzcta2_index:
target_path = base_path + "\\" + each_dzcta2 + ".data"
if os.path.exists(target_path):
f = open(target_path, "r")
outio = open(output_csv,"a")
row = f.readline()
while row:
row_content = row.replace("\n","").split(",")
temp_dzcta = each_dzcta2+row_content[0]
if temp_dzcta in odict:
outio.write("{0},{1},{2},{3},2\n".format(ozcta, temp_dzcta, row_content[1], row_content[2]))
del odict[temp_dzcta]
row = f.readline()
f.close()
outio.close()
return odict
def cal_data_lv3(ozcta, odict, lvl3_at, lvl3_bt, lvl3_ad, lvl3_bd, output_csv, speed):
for each_dzcta in odict:
dist = cal_geodesic_dist (float(odict[each_dzcta][1]), float(odict[each_dzcta][0]), float(odict[each_dzcta][3]), float(odict[each_dzcta][2]))
adj_dist = lvl3_bd*dist + lvl3_ad
adj_time = lvl3_bt*dist/speed + lvl3_at
outio = open(output_csv,"a")
outio.write("{0},{1},{2},{3},3\n".format(ozcta, each_dzcta, adj_time, adj_dist))
outio.close()
def fetch_info_and_write(rs, db1, db2, output_csv, lvl3_at, lvl3_bt, lvl3_ad, lvl3_bd, lvl3_speed):
for each_ozcta in rs.data:
temp_t = tc.timer()
n0 = len(rs.data[each_ozcta])
lv11_leftover = get_data_from_lv1(each_ozcta, rs.data[each_ozcta], db1, output_csv)
n1 = n0 - len(lv11_leftover)
n10 = len(lv11_leftover)
temp_t.lap()
lvl2_leftover = get_data_from_lv2(each_ozcta, lv11_leftover, db1, output_csv)
n3 = len(lvl2_leftover)
n2 = n10 - n3
temp_t.lap()
cal_data_lv3(each_ozcta, lvl2_leftover, lvl3_at, lvl3_bt, lvl3_ad, lvl3_bd, output_csv, lvl3_speed)
temp_t.lap()
print("lvl1:{0}s - {3}\nlvl2:{1}s - {4}\nlvl3:{2}s - {5}\n\n".format(temp_t.time_int[0], temp_t.time_int[1], temp_t.time_int[2], n1, n2, n3))
rs.reset()
t1 = tc.timer()
indata = open(input_csv, "r")
outdata = open(output_csv, "w")
outdata.write("ozcta, dzcta, minutes, miles, lvl\n")
outdata.close()
rs = data_station()
header = indata.readline().replace("\n", "").split(",")
iozcta = header.index("OZCTA")
idzcta = header.index("DZCTA")
i = 0
row = indata.readline()
while row:
if i % 10000 == 0:
#arcpy.AddMessage("Retrieving {0} records...".format(i))
print("Retrieving {0} records...".format(i))
row_content = row.replace("\n", "").split(",")
temp_ozcta = row_content[iozcta]
temp_dzcta = row_content[idzcta]
rs.push_data(temp_ozcta, temp_dzcta, row_content[2:])
if rs.n >= rs.nlimit:
fetch_info_and_write(rs, db1, db2, output_csv, lvl3_at, lvl3_bt, lvl3_ad, lvl3_bd, lvl3_speed)
row = indata.readline()
i += 1
fetch_info_and_write(rs, db1, db2, output_csv, lvl3_at, lvl3_bt, lvl3_ad, lvl3_bd, lvl3_speed)
indata.close()
outdata.close()
t1.lap()
print(t1.format_time(t1.total_sec))