In [1]:
%load_ext autoreload
%autoreload 2

In [49]:
import ray
from ray.util.queue import Queue, Empty

from typing import Optional
import requests
import time
import itertools
import socket

## Ray cluster management ( chenage this for  your cluster initialization )

In [3]:
from toolkit_run.ray.server import LabRayToolkitServer
server = LabRayToolkitServer()

In [4]:
server.dashboard_url

'https://925b41b8-2383-4da4-9af6-3a2afa809568-8000.job.console.elementai.com'

In [51]:
server.scale_cluster(20)

'OK'

In [None]:
server.scale_cluster(0)

## Test

In [50]:
def download(url: str, user_agent: str = 'big_code_bot_v01', num_retry: int = 3, proxies: Optional[list[str]] = None):
    headers = {
        'User-Agent': user_agent
    }
    try:
        while True:
            resp = requests.get(url, headers=headers, proxies=proxies)
            html = resp.text
            if resp.status_code == 429:
                # Retry indifenetly on Rate limit response
                time.sleep(1)
                continue
            if 500 <= resp.status_code < 600 and num_retry:
                num_retry -= 1
                continue
            if resp.status_code > 400:
                return None, resp.status_code
            return html, resp.status_code
    except requests.exceptions.RequestException:
        return None, 'RequestException'

@ray.remote(scheduling_strategy="SPREAD")
def test_task(q):
    res = []
    while q.size() > 0:
        try:
            el = q.get(block=False)
            _, ret_code = download(f'https://github.com/{el}')
            res.append((el, ret_code))
        except Empty:
            break
    return res, ray.get_runtime_context().get(), socket.gethostbyname(socket.gethostname())

def test(repo_names_q, num_instances):
    print('queue size:', repo_names_q.size())
    res = []
    t1 = time.time()
    for i in range(num_instances):
        res.append(test_task.remote(repo_names_q))
    res = ray.get(res)
    dt = time.time() - t1
    cnt = sum(len(el[0]) for el in res)
    print(f'num_instances {num_instances}, cnt: {cnt}, speed: {cnt / dt} items/sec')
    print('different ip addresses cnt: ', len(set(el[2] for el in res)))
    return res

In [7]:
repos = [
    'scribblemaniac/RenderChan-Blender-Plugin',
    'varrasivareddy/test',
    'JacobGH111/mightymightymongo',
    'ivanseidel/IAMDinosaur',
    'ilangleben19/CTFGame',
    'Lombiq/Orchard-Background-Task-Viewer',
    'kepler155c/Agar.io-bot',
    'mk0x9/go',
    'teeerevor/hottest100_react',
    'gottesmm/swift',
    'planetoftheweb/rouxfinished',
    'themike10452/HKM-Remastered',
    'honkimi/gourmedia',
    'singwhatiwanna/dynamic-load-apk',
    'stavro/arc_ecto',
    'aknik/chain_ne',
    'Lombiq/Orchard-Download-As',
    'VFR-maniac/L-SMASH-Works',
    'yahoo/fluxible',
    'videolan/vlc',
    'CorwynRavenwing/sca.pennsicland',
    'cose-wg/cose-spec',
    'ReyesJunior/mythos',
    'thinkgem/jeesite',
    'adonisjs/adonis-framework',
    'woshichuanqilz/LiZheBlog',
    'dineshprjpt/EventDebugger',
    'TheOrangeTester/hindifree',
    'DavidZhang309/CoreFramework',
    'Lombiq/Orchard-Module-Profiles',
    'shakacode/react_on_rails',
    'vissense/vissense',
    'shafayetkhan/ohai-emacs',
    'zuohl/jeesite',
    'Lyle-Wong/bms',
    'triacontane/RPGMakerMV',
    'tianmajs/tianshu',
    'b-cube/Response-Identification-Info',
    'bflad/chef-stash',
    'GaloisInc/HaNS',
    'charleneferguson/love',
    'ijzm/AstroTrip',
    'edfungus/Crouton',
    'jleung51/labyrinth',
    'elliotchance/dandy',
    'segrived/NSISInfoWriter',
    'w3ctrain/Article',
    'steven-ha/week10',
    'wireshark/wireshark',
    'dolphilia/dolphilia.github.io',
    'physiii/media-server',
    'mammothb/project-euler',
    'devd/Academic-Writing-Check',
    'imwangwang/stm32_grow_box',
    'liuchenwei2000/Persistence',
    'f/kamber',
    'kevansimpson/advent-of-code-2015',
    'Netflix/PigPen',
    'rastikerdar/shabnam-font',
    'jonathanluo/jsf',
    'chriskohlhoff/asio',
    'zhouxincheng/tryagain',
    'yiisoft/yii2-app-basic',
    'ucsf-ckm/www.library.ucsf.edu',
    'mrpoulin/watbus-api',
    'yanni4night/yanni4night.com',
    'meteor-fan/meteor-docs-ja',
    'Spellchaser/Cenemus',
    'Hanul/UPPERCASE-Sample-Chat',
    'zhangdongli/Cocos2d-lua-BehaviorTree',
    'HubPress/hubpress.io',
    'KartikAysola/Test',
    'jb-alvarado/media-autobuild_suite',
    'coding-horror/coding-horror.github.io',
    'arishuynhvan/FreeCodeCamp_mybasejumps',
    'TEAMERICA/Hello-World',
    'Lombiq/Helpful-Libraries',
    'abroederdorf/abroederdorf.github.io',
    'izumin5210/dotfiles',
    'faizy0303/MAZE',
    'Shyam-Chen/angular-seed-project',
    'rd-switchboard/Neo4j-Browser',
    'scrapy/scrapyd',
    'muskanpoudel/FYP',
    'hannahmbanana/CS193P',
    'titopao/pyramid_favicon',
    'picrinite/Courses',
    'hingsir/thrush',
    'philsquared/Catch',
    'capira12/snapdrop',
    'build2be/drupal-rest-test',
    'mks-greenfield/trendr',
    'CSHS/myProjects',
    'joseffaghihi/GamificationAlgebraConcepts',
    'asterisk/asterisk',
    'iscochxx/HelloGit',
    'socrata-platform/soql-reference',
    'dsk098/lua-checker',
    'Runner55/PracticeUHC',
    'ltegman/FreeCodeCamp',
    'pullapprove/support',
    'd-soni/Reddit-Karma-Prediction',
    'Lombiq/Orchard-Watcher',
    'glennjones/hapi-swagger',
    'foreverjyoung/foreverjyoung.github.io',
    'gzou107/LeetCode',
    'dennis714/RE-for-beginners',
    'whastings/objectdotcreate.net-v2',
    'rcarmo/pythonium',
    'acini/autowikibot-py',
    'Sixish/Odyssey',
    'duomark/epocxy',
    'milkbikis/powerline-shell',
    'cosmosfarm/KBoard-wordpress-plugin',
    'MorganCAw/progressive-boilerplate',
    'alecmori/bad_tic_tac_toe',
    'ChenYilong/CYLTabBarController',
    'sestegra/scoop',
    'Lombiq/Helpful-Extensions',
    'elBukkit/MagicPlugin',
    'OdinsHat/hexo-deezer-playlist-helper',
    'OwlFonk/OwlCarousel',
    'mmihira2/proxyData',
    'nodejs/node',
    'metakirby5/whizkers',
    'winter-fall/Bye2015Hi2016',
    'arasatasaygin/is.js',
    'sestegra/scoop-extras',
    'lutris/buildbot',
    'wiiaboo/media-autobuild_suite',
    'vinceyuan/gopl-solutions',
    'StoptheTraffik/wpsite',
    'madewithlinux/xkcdapp',
    'Atmosphere201410/tiefengquan',
    'falsovsky/mytracker',
    'Force-Fitness/FFPT',
    'godotengine/godot',
    'promovideo/palinsesto-promovideo',
    'tony/freebsd',
    'blockloop/shell-go',
    'simondlevy/ARDroneAutoPylot',
    'wowdd1/dotWave',
    'framgia/brs_18',
    'kmagai/slack-bot-pomodoro',
    'sethjackson/neovim',
    'onenewera/iamdonny',
    'camp510/camp510.github.io',
    'perlackline/perlack',
    'indiecrew/testing',
    'msteveb/autosetup',
    'TinyNova/ad',
    'roxas75/rxTools',
    'MertensMing/mertens-ife-study-record',
    'katsumata-ryo/tucho',
    'asannou/double-click-tls',
    '5donuts/Lotto-Number-Generator',
    'juliuswong/juliuswong.github.io',
    'daraobeirne/coder',
    'brianhigh/research-computing',
    'meh/httprot',
    'takeshibaconsuzuki/supr_scout',
    'ExactTarget/fuelux',
    'doubleplusc/Line-sticker-downloader',
    'elving/swag',
    'alisonmonteiro/getcomponent.js',
    'mrzmmr/remark-behead',
    'jayman9696/TheWreckingCrewUK',
    'KristofferHebert/staystream',
    'odoo/odoo',
    'gromitsun/imseg',
    'vijaytholpadi/APITrials',
    'rytone/loveworks',
    'pylasrenu/AsProject_service',
    'evykassirer/personal_website',
    'xeroplz/aura',
    'NextAcademy/quora-clone',
    'frodsan/rack-secure_headers',
    'franklioxygen/odfd',
    'luismendozamx/luismendozamx.github.io',
    'Supermichael777/-tg-station',
    'oklai/koala',
    'f/kamber-theme-default',
    'daid/Cura',
    'wanggongming/learngit',
    'NHNNEXT/2015-03-REAL-TUTU',
    'austenke/dflife',
    'monospaced/angular-elastic',
    'siukwan/unix',
    'opennota/screengen',
    'GoogleChrome/big-rig',
    'Lombiq/Orchard-Simple-Analytics',
    'tony/freebsd-ports',
    'rusith/URD',
    'nottvlike/behavior3lua',
    'GeorgeJahad/debug-repl',
    'jhansireddy/AndroidScannerDemo',
    'GoogleChrome/application-shell',
    'assaf/zombie',
    'framgia/brs_19',
    'Lombiq/Associativy-Web-Services',
    'daicham/divelog',
    'Anaminus/roblox-bug-tracker',
    'anak10thn/juriko-balancer',
    'Lombiq/Pretty-Good-Base-Theme',
    'simonbw/saladbowl-js',
    'mkoppanen/php-zmq',
    'jrminter/tips',
    'cenyuhai/spark',
    'twizmwazin/Glowkit',
    'Valdus/ProjectHex',
    'musabjilani/weatherStation',
    'krakenjs/kraken-js',
    'Kickflip/kickflip-android-sdk',
    'textualize/textualize',
    'amrish001/corejava',
    'quran/quran_android',
    'indexofire/bac-ngs-book',
    'reachtarunhere/reachtarunhere.github.io',
    'simplaio/rucksack',
    'shanygy/sample-app-ruby',
    'Lombiq/Orchard-Ace-Editor',
    'Lombiq/Orchard-Azure-Indexing',
    'nylas/N1',
    'corerd/PyDomo',
    'ystyle/autoss-go',
    'noodle-learns-programming/laravel',
    'bbaiggey/spark',
    'grishmashrestha/lf-exe',
    'SickRage/sickrage.github.io',
    'cdholjes/finalHTMLGit',
    'cheton/cnc.js',
    'Poniverse/Pony.fm',
    'zkyf/Eigenface',
    'campuscodefest/ccf',
    'linuxmint/Cinnamon',
    'zk-phi/electric-case',
    'Lombiq/Orchard-Confirm-Leave',
    'sweety-wo/halo-gui',
    'gruntjs/grunt',
    'libgit2/objective-git',
    'newming/newming.github.io',
    'Big-Data-Manning/big-data-code',
    'nguyenkimkm/nguyenkim-xlnntn',
    'dileepkumar0234/taxapplication',
    'pariwalton4/Money-Making',
    'WDonegan/ttt-3-display_board-example-q-000',
    'tumashu/chinese-pyim',
    'jlevy/the-art-of-command-line',
    'react-guide/react-router-cn',
    'ghostbody/SE-Robot-Course',
    'Tasbiha/xyz',
    'HeitorG/nipe',
    'jw2013/elixir-china',
    'hrzhu/awesome-v3.5-configs',
    'sinlov/LearnPython',
    'tschoonj/fgsl',
    'JustinTulloss/zeromq.node',
    'Stylo101/hello-world',
    'hecate-xw/hecate-xw.github.io',
    'swiftmailer/swiftmailer',
    'Lombiq/Orchard-Scripting-Extensions-DotNet',
    'betaY/Mahjong',
    'amcnamara/founder-feedback',
    'domainexpert/klee',
    'dhruv-russmediatech/dhruv-russmediatech.github.io',
    'thesabbir/simple-line-icons',
    'jhsu26/my_website',
    'tony/docker',
    'odoo-dev/odoo',
    'inwinstack/owncloud-shared_session',
    'Eric-Guo/wechat',
    'monochromegane/thinking-megane',
    'crquan/coremem',
    'arnodick/tiny_parkour',
    'SSalekin/brs_19',
    'WhisperSystems/libaxolotl-c',
    'yunsite/taketolove',
    'NativeScript/NativeScript',
    'AsProject/AsProject_service',
    'Mooophy/Cpp-Primer',
    'cuzz1369-r/frameworks_base',
    'Lombiq/Orchard-Facebook-Suite',
    'hanc00l/weblogic_unserialize_exploit',
    'AlisamTechnology/ATSCAN',
    'jainansh10591/PageManagerJadeBootstrap',
    'xaxxon/v8-class-wrapper',
    'daveaglick/FluentBootstrap',
    'dnschneid/crouton',
    'a5nI/Ix',
    'PowerShellMafia/PowerSploit',
    'JhalakM/zara-wms',
    'Laurendus/bigband-website',
    'jmaslak/dotfiles',
    'themugh/ugmeet',
    'kominu/pcap_program',
    'codelucas/newspaper',
    'paranoidq/yelp-data-processing',
    'emoacht/DesktopToast',
    'liuchenwei2000/Webservice',
    'pakyow/pakyow'
]

In [45]:
repo_names_q = Queue()

In [58]:
for el in repos:
    repo_names_q.put(el)
res = test(repo_names_q, 1)

queue size: 300
num_instances 1, cnt: 300, speed: 1.795540408088278 items/sec
different ip addresses cnt:  1


In [59]:
for el in repos:
    repo_names_q.put(el)
res = test(repo_names_q, 20)

queue size: 300
num_instances 20, cnt: 300, speed: 5.928953191112569 items/sec
different ip addresses cnt:  20
