<a href="https://colab.research.google.com/github/clearspandex/distributed-ml-ray/blob/main/notebooks/introduction_to_ray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ray requests faker

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Introduction to Ray Core

### Ray Tasks

In [2]:
import requests

internet = requests.get('https://moz.com/top-500/download?table=top500Domains').text
internet

'"Rank","Root Domain","Linking Root Domains","Domain Authority"\n"1","youtube.com","22,224,825","100"\n"2","www.google.com","14,111,479","100"\n"3","apple.com","6,384,326","100"\n"4","microsoft.com","5,119,715","99"\n"5","www.blogger.com","29,474,909","99"\n"6","support.google.com","5,271,264","99"\n"7","play.google.com","3,772,424","99"\n"8","linkedin.com","11,641,656","99"\n"9","docs.google.com","3,220,890","98"\n"10","youtu.be","5,050,850","98"\n"11","maps.google.com","5,793,320","98"\n"12","wordpress.org","11,733,715","98"\n"13","en.wikipedia.org","6,902,193","98"\n"14","cloudflare.com","6,282,712","98"\n"15","mozilla.org","2,350,527","98"\n"16","adobe.com","2,852,435","97"\n"17","drive.google.com","2,367,379","97"\n"18","europa.eu","2,163,424","97"\n"19","plus.google.com","11,283,101","97"\n"20","googleusercontent.com","3,227,896","97"\n"21","bp.blogspot.com","17,673,836","97"\n"22","accounts.google.com","2,559,900","97"\n"23","sites.google.com","2,167,331","97"\n"24","whatsapp.co

In [3]:
from io import StringIO
import pandas as pd

df = pd.read_csv(StringIO(internet))

In [4]:
df

Unnamed: 0,Rank,Root Domain,Linking Root Domains,Domain Authority
0,1,youtube.com,22224825,100
1,2,www.google.com,14111479,100
2,3,apple.com,6384326,100
3,4,microsoft.com,5119715,99
4,5,www.blogger.com,29474909,99
...,...,...,...,...
495,496,pewresearch.org,309044,92
496,497,bp3.blogger.com,555463,92
497,498,blackberry.com,147123,92
498,499,redhat.com,235399,92


In [2]:
import ray, logging

ray.init()

2022-07-01 05:33:49,126	INFO services.py:1476 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


RayContext(dashboard_url='127.0.0.1:8265', python_version='3.7.13', ray_version='1.13.0', ray_commit='e4ce38d001dbbe09cd21c497fedd03d692b2be3e', address_info={'node_ip_address': '172.28.0.2', 'raylet_ip_address': '172.28.0.2', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2022-07-01_05-33-44_292474_60/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2022-07-01_05-33-44_292474_60/sockets/raylet', 'webui_url': '127.0.0.1:8265', 'session_dir': '/tmp/ray/session_2022-07-01_05-33-44_292474_60', 'metrics_export_port': 58502, 'gcs_address': '172.28.0.2:56753', 'address': '172.28.0.2:56753', 'node_id': '43a2d5ae441bc1af39427f3ac163f0299f1ff24db01442df95aadb11'})

In [3]:
import os

os.cpu_count()

2

In [6]:
@ray.remote
def f(x):
    return x * x

In [7]:
ray.get(f.remote(10))

100

In [8]:
futures = [f.remote(i) for i in range(4)]
print(ray.get(futures))

[0, 1, 4, 9]


In [9]:
ref = ray.put("Jonathan")
ref

ObjectRef(00ffffffffffffffffffffffffffffffffffffff0100000001000000)

In [19]:
import time

@ray.remote
def index(url):
    time.sleep(2)
    return url.split('.')[-1]

futures = [index.remote(site) for site in df['Root Domain'].to_list()[:10]]

In [20]:
futures[:5]

[ObjectRef(cae5e964086715a4ffffffffffffffffffffffff0100000001000000),
 ObjectRef(bcb4fef46b376cafffffffffffffffffffffffff0100000001000000),
 ObjectRef(88543757a8df6d2fffffffffffffffffffffffff0100000001000000),
 ObjectRef(347cc60e0bb3da74ffffffffffffffffffffffff0100000001000000),
 ObjectRef(a02c24b8b7fc0a31ffffffffffffffffffffffff0100000001000000)]

In [21]:
from collections import Counter

In [22]:
%%time

Counter(ray.get(futures))

CPU times: user 72.1 ms, sys: 7.99 ms, total: 80.1 ms
Wall time: 5.3 s


Counter({'be': 1, 'com': 9})

In [4]:
ray.available_resources()

{'CPU': 2.0,
 'memory': 7877495195.0,
 'node:172.28.0.2': 1.0,
 'object_store_memory': 3938747596.0}

## Ray Actors

In [34]:
import random
from faker import Faker

@ray.remote
class Child(object):
    def __init__(self):
        self.name = Faker().name()
        self.age = 1

    def grow(self):
        self.age += 1
        return self.age

    def greet(self):
        return (
            f'My name is {self.name} '
            f'and I am {self.age} years old'
        )

In [35]:
children = [Child.remote() for i in range(10)]

In [36]:
for c in children:
    for _ in range(random.randint(1, 10)):
        c.grow.remote()

In [37]:
futures = [c.greet.remote() for c in children]

for future in ray.get(futures):
    print(future)

My name is Hannah Hull and I am 4 years old
My name is Natalie Reed and I am 7 years old
My name is Amanda Shaffer and I am 6 years old
My name is Travis Johnson and I am 9 years old
My name is Loretta Hanson and I am 11 years old
My name is Jody Harrison and I am 5 years old
My name is John Dunn and I am 3 years old
My name is Max Page and I am 2 years old
My name is Jonathan Lopez and I am 7 years old
My name is Ricky Kelly and I am 7 years old


In [39]:
c = children[0]

In [41]:
ray.get(c.greet.remote())

'My name is Hannah Hull and I am 4 years old'

In [42]:
print(ray.get([c.grow.remote() for _ in range(5)]))

[5, 6, 7, 8, 9]


In [43]:
# actors stay around as long as they are in scope
# since nothing really goes out of scope in a notebook
# we have to manually terminate them
[ ray.kill(person) for person in children ]

[None, None, None, None, None, None, None, None, None, None]

In [44]:
# or just shutdown ray
ray.shutdown()