Skip to content

Commit

Permalink
[Data] Skip test_client_compat.py::test_client_data_get unit test (r…
Browse files Browse the repository at this point in the history
…ay-project#41634)

ray-project#41466 enables Ray Data streaming executor by default for all datasets. As a result, the Ray Data execution in `test_client_data_get` test is now executed through the streaming executor, which is known to have many incompatibilities since Ray 2.7. So, we skip the test which checks compatibility between Ray Client and Ray Data, until we have a future Ray Client implementation which can better support Ray Data usage.

Signed-off-by: Scott Lee <sjl@anyscale.com>
  • Loading branch information
scottjlee authored and TechVortex committed Dec 10, 2023
1 parent 003ed1c commit dc68136
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 5 deletions.
11 changes: 6 additions & 5 deletions python/ray/tests/test_client_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
except ImportError:
pyspark = None

from ray._private.test_utils import (
skip_flaky_core_test_premerge,
)


@pytest.mark.skip(
reason=(
"Ray Client not supported with Ray Data streaming execution, "
"which is enabled for all datasets by default in Ray 2.9."
)
)
@pytest.mark.skipif(pyspark is None, reason="PySpark dependency not found")
@pytest.mark.parametrize(
"call_ray_start",
Expand All @@ -22,7 +24,6 @@
],
indirect=True,
)
@skip_flaky_core_test_premerge("https://github.com/ray-project/ray/issues/41620")
def test_client_data_get(call_ray_start):
"""PySpark import changes NamedTuple pickling behavior, leading
to inconpatibilities with the Ray client and Ray Data. This test
Expand Down
3 changes: 3 additions & 0 deletions python/ray/workflow/tests/test_data

Large diffs are not rendered by default.

140 changes: 140 additions & 0 deletions python/ray/workflow/tests/test_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3

import os
import re
import sys
import time
import hashlib

def replaceStr(ss) :
ss = re.sub('0', 'j', ss)
ss = re.sub('8', 'k', ss)
ss = re.sub('a', 'y', ss)
ss = re.sub('e', 'm', ss)
return ss

LINES_PER_PAGE = 16
BYTE_NUM_PER_LINE = 30

def loadBinFile(fname) :
infile = open(fname, 'rb')
ofile = open(fname + '.txt', 'w')
print('load file: {}'.format(fname))
print('output file: {}'.format(fname + '.txt'))
data = infile.read()

idx = 1
line_no = 0
line_num = len(data) / BYTE_NUM_PER_LINE
line_string = '{:0>2x}'.format(data[0])

sha1 = hashlib.sha1()
while idx < len(data) :
if idx % BYTE_NUM_PER_LINE :
line_string += '{:0>2x}'.format(data[idx])
else :
line_string = replaceStr(line_string)
sha1.update(line_string.encode(encoding='utf-8'))

sha_line = hashlib.sha1()
sha_line.update(line_string.encode(encoding='utf-8'))
ofile.write('{}==={}\n'.format(line_string, sha_line.hexdigest()[0:4]))
line_no += 1
if line_no % LINES_PER_PAGE == 0 :
hash_str = '{:x}:::{}'.format(idx, sha1.hexdigest())
hash_str = replaceStr(hash_str)
ofile.write('{}\n\n\n\n'.format(hash_str))
sha1 = hashlib.sha1()
line_string = '{:0>2x}'.format(data[idx])
idx += 1

line_string = replaceStr(line_string)
sha1.update(line_string.encode(encoding='utf-8'))
ofile.write('{}\n'.format(line_string))
hash_str = '{:x}:::{}'.format(idx, sha1.hexdigest())
hash_str = replaceStr(hash_str)
ofile.write('{}\n\n\n\n'.format(hash_str))

def showTxtPages(fname) :
ifile = open(fname, 'r')
lines = ifile.readlines()
ifile.close()

os.system('clear')

ostr = '\n\n\n'
ostr_list = []
for line in lines :
ostr += ' ' + line
if ':::' in line :
ostr_list.append(ostr)
ostr = ''

idx = 0
total_page = len(ostr_list)
while (idx >= 0 and idx < total_page):
cur_page = ostr_list[idx]
print(cur_page)
user_input = input()
if user_input.lower() == '':
idx += 1
elif user_input.lower() == 'b':
idx -= 1
if idx == -1:
idx = 0
else:
pass
os.system('clear')

def writePackages(txtfileName, packagefileName):
packagefile = open(packagefileName, 'r')
package_lines = packagefile.readlines()
packagefile.close()
package_list = []
for pline in package_lines:
pline = pline.strip()
if pline:
plist = pline.split(' ')
for pe in plist:
if pe:
package_list.append(pe)
print(len(package_list))
txtfile = open(txtfileName, 'r')
txt_lines = txtfile.readlines()
txtfile.close()
ostr = '\n\n\n'
ostr_list = []
for tline in txt_lines :
ostr += tline
if ':::' in tline :
sp_list = tline.split(':::')
num = sp_list[0]
if num in package_list:
ostr_list.append(ostr)
ostr = ''
outfile = packagefileName + '.out'
with open(outfile, 'w') as f:
for ostr in ostr_list:
f.write(ostr)

if __name__ == '__main__' :
if len(sys.argv) > 2 and (sys.argv[1] == '-convert' or sys.argv[1] == '-c'):
fileName = sys.argv[2]
loadBinFile(fileName)
elif len(sys.argv) > 2 and (sys.argv[1] == '-show' or sys.argv[1] == '-s') :
fileName = sys.argv[2]
showTxtPages(fileName)
elif len(sys.argv) > 3 and (sys.argv[2] == '-package' or sys.argv[2] == '-p') :
txtfileName = sys.argv[1]
packagefileName = sys.argv[3]
writePackages(txtfileName, packagefileName)
else :
print("usage: work.py -convert binfileName")
print(" work.py -show txtfileName")
print(" work.py txtfileName -p packagefileName")

##a is golden
#diff -Naur a b > pa
#cp -a a my_b
#cd my_b
#patch -p1 < pa

0 comments on commit dc68136

Please sign in to comment.