In [1]:
import ray 
ray.init()

2024-03-08 04:37:31,583	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.13
Ray version:,2.2.0
Dashboard:,http://127.0.0.1:8265


## ray.remote를 사용해서 작업 병렬화

#### ray 를 사용하지 않는 경우의 소요 시간

In [2]:
import time

database = [
    "Learning", "Ray", "Flexible", "Distributed", "Python", "for", "Machine", "Learning"
]

def retrieve(item: int):
    time.sleep(item/10.)
    return item, database[item]

def print_runtime(input_data, start_time):
    print(f'Runtime: {time.time() - start_time:.2f} seconds data: ')
    print(*input_data, sep='\n')

start = time.time()
data = [retrieve(item) for item in range(8)]
print_runtime(data, start)


Runtime: 2.80 seconds data: 
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Machine')
(7, 'Learning')


#### ray를 사용했을 때의 가속화 

In [3]:
@ray.remote
def retrieve_task(item):
    return retrieve(item)

start = time.time()
obj_refs = [
    retrieve_task.remote(item) for item in range(8)
]
data = ray.get(obj_refs)
print_runtime(data,start)

Runtime: 0.76 seconds data: 
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Machine')
(7, 'Learning')


## ray.put 이용하기

In [4]:
db_object_ref = ray.put(database)

@ray.remote
def retrieve_task(item, db_ref):
    time.sleep(item/10.0)
    return item, db_ref[item]

start = time.time()
obj_refs = [
    retrieve_task.remote(item, db_object_ref) for item in range(8)
]
data = ray.get(obj_refs)
print_runtime(data,start)

Runtime: 0.71 seconds data: 
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Machine')
(7, 'Learning')


## 논블로킹 호출에 대한 ray.wait 함수 사용하기

In [5]:
start = time.time()
obj_refs = [
    retrieve_task.remote(item, db_object_ref) for item in range(8)
]
all_data = []
while len(obj_refs) > 0:
    finished, obj_refs = ray.wait(
        obj_refs, num_returns=2, timeout=7.0
    )
    data = ray.get(finished)
    print_runtime(data, start)
    all_data.extend(data)

Runtime: 0.10 seconds data: 
(0, 'Learning')
(1, 'Ray')
Runtime: 0.31 seconds data: 
(2, 'Flexible')
(3, 'Distributed')
Runtime: 0.51 seconds data: 
(4, 'Python')
(5, 'for')
Runtime: 0.71 seconds data: 
(6, 'Machine')
(7, 'Learning')


## 테스크 의존성 다루기 

- 다른 ray.task에 의존된 후속 task 처리하기

In [6]:
from typing import List, Tuple

@ray.remote
def follow_up_task(retrieve_result: Tuple[int, str]) -> Tuple:
    original_item, _ = retrieve_result
    follow_up_result = retrieve(original_item + 1)
    return retrieve_result, follow_up_result

retrieve_refs = [retrieve_task.remote(item, db_object_ref) for item in [0, 2, 4, 6]]
follow_up_refs = [follow_up_task.remote(ref) for ref in retrieve_refs]
result = [print(data) for data in ray.get(follow_up_refs)]

((0, 'Learning'), (1, 'Ray'))
((2, 'Flexible'), (3, 'Distributed'))
((4, 'Python'), (5, 'for'))
((6, 'Machine'), (7, 'Learning'))


## 액터 사용

In [7]:
@ray.remote
class DataTracker:
    def __init__(self):
        self._counts = 0
    def increment(self):
        self._counts += 1
    def counts(self):
        return self._counts

@ray.remote
def retrieve_tracker_task(item, tracker, db_ref):
    time.sleep(item/10.0)
    tracker.increment.remote()
    return item, db_ref[item]

tracker = DataTracker.remote()

start = time.time()
obj_refs = [
    retrieve_tracker_task.remote(item, tracker, db_object_ref) for item in range(8)
]
data = ray.get(obj_refs)
print_runtime(data,start)
print(f'tracker counts: {ray.get(tracker.counts.remote())}')
        

Runtime: 0.71 seconds data: 
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Machine')
(7, 'Learning')
tracker counts: 8


## ray를 사용한 맵리듀스

In [8]:
import subprocess
zen_of_python = subprocess.check_output(["python", "-c", "import this"])
corpus = zen_of_python.split()  # list of word

num_partitions = 3
chunk = len(corpus) // num_partitions
partitions = [
    corpus[i*chunk: (i+1)*chunk] for i in range(num_partitions)
] # partitioned list of word

In [9]:
def map_function(document):
    for word in document.lower().split():
        yield word, 1

from typing import List, Tuple

@ray.remote
def apply_map(corpus, num_partitions=3) -> List[List[Tuple[str,int]]]:
    map_results = [list() for _ in range(num_partitions)]
    for document in corpus:
        for result in map_function(document):
            word, cnt = result
            first_letter = word.decode('utf-8')[0]
            word_index = ord(first_letter) % num_partitions  # shuffle by first letter of word
            map_results[word_index].append(result)
    return map_results

map_results = [
    apply_map.options(num_returns=num_partitions)
    .remote(data, num_partitions)
    for data in partitions
]

for i in range(num_partitions):
    mapper_results = ray.get(map_results[i])
    for j, result in enumerate(mapper_results):
        print(f"Mapper {i}, return value {j}: {result[:2]}")
    

Mapper 0, return value 0: [(b'of', 1), (b'is', 1)]
Mapper 0, return value 1: [(b'python,', 1), (b'peters', 1)]
Mapper 0, return value 2: [(b'the', 1), (b'zen', 1)]
Mapper 1, return value 0: [(b'unless', 1), (b'in', 1)]
Mapper 1, return value 1: [(b'although', 1), (b'practicality', 1)]
Mapper 1, return value 2: [(b'beats', 1), (b'errors', 1)]
Mapper 2, return value 0: [(b'is', 1), (b'is', 1)]
Mapper 2, return value 1: [(b'although', 1), (b'a', 1)]
Mapper 2, return value 2: [(b'better', 1), (b'than', 1)]


In [16]:
@ray.remote
def apply_reduce(*results):
    reduce_results = dict()
    for res in results:
        for key, value in res:
            if key not in reduce_results:
                reduce_results[key] = 0
            reduce_results[key] += value
        return reduce_results

outputs = []
for i in range(num_partitions):
    outputs.append(
        apply_reduce.remote(*[partition[i] for partition in map_results])
    )
counts = {k: v for output in ray.get(outputs) for k, v in output.items()}
sorted_counts = sorted(counts.items(), key=lambda item: item[1], reverse=True)
for count in sorted_counts:
    print(f"{count[0].decode('utf-8')}: {count[1]}")

is: 6
better: 6
than: 6
special: 2
the: 2
of: 1
ugly.: 1
implicit.: 1
complex.: 1
complex: 1
complicated.: 1
flat: 1
readability: 1
counts.: 1
cases: 1
rules.: 1
python,: 1
peters: 1
simple: 1
sparse: 1
dense.: 1
aren't: 1
zen: 1
by: 1
tim: 1
beautiful: 1
explicit: 1
nested.: 1
enough: 1
to: 1
break: 1
