In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.insert(0, "..")

import asyncio
from time import time

import altair as alt
import numpy as np
import pandas as pd
from connectorx import Connector
from contexttimer import Timer

In [36]:
PALETTE = ["#bdb8ad", "#ece7e0", "#c6d4e1", "#44749d"]

# Exp1

In [23]:
exp1data = pd.DataFrame(data=None, columns=["API", "Concurrency", "Elapsed"])

In [None]:
concurrencies = [1,3,5]
repeat = 5

### Yelp

In [27]:
yelp_token = "<your-token>"

In [30]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:
            dc = Connector("yelp", _concurrency=c, _auth={"access_token": yelp_token})
            await dc.query(
                "businesses",
                term="restaurant",
                location="vancouver",
                _count=500,
            )

        exp1data = exp1data.append(
            pd.Series(["yelp", c, t.elapsed], index=exp1data.columns),
            ignore_index=True,
        )
        await asyncio.sleep(2)

### DBLP

In [32]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:
            dc = Connector("dblp", _concurrency=c)
            await dc.query("publication", q="AI", _count=10000)
            exp1data = exp1data.append(
                pd.Series(["dblp", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )

            await asyncio.sleep(2)

### Spotify

In [34]:
spotify_client_id = "<your-token>"
spotify_client_secret = "<your-token>"

In [35]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:
            dc = Connector(
                "spotify",
                _concurrency=c,
                _auth={
                    "client_id": spotify_client_id,
                    "client_secret": spotify_client_secret,
                },
            )
            await dc.query("album", q="love", _count=500)
            exp1data = exp1data.append(
                pd.Series(["spotify", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

### Twitch

In [10]:
twitch_id = "<your-token>"

In [12]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:
            dc = Connector(
                "twitch",
                _concurrency=c,
                _auth={"access_token": twitch_id},
            )

            await dc.query("channels", query="star", _count=1000)
            exp1data = exp1data.append(
                pd.Series(["twitch", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

['twitch', 5, 2.347855629399419]


### Bing

In [4]:
azure_token = "<your-token>"

In [8]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:
            dc = Connector(
                "azure", _concurrency=c, _auth={"access_token": azure_token}
            )

            await dc.query("bing", q="people", _count=500)
            exp1data = exp1data.append(
                pd.Series(["azure", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)


Request failed, decreasing the concurrency level to 2


['azure', 5, 5.064618270844221]


### Etsy

In [10]:
etsy_token = "<your-token>"

In [12]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:

            dc = Connector(
                "Etsy", _concurrency=c, _auth={"access_token": etsy_token}
            )
            await dc.query("search", keywords="ipad", _count=250)
            exp1data = exp1data.append(
                pd.Series(["etsy", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)


['etsy', 5, 3.0532836578786373]


### Wikia

In [13]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:

            dc = Connector("wikia", _concurrency=c)

            await dc.query("search", query="game", _count=1000)
            exp1data = exp1data.append(
                pd.Series(["wikia", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

['wikia', 5, 2.4066569972783327]


### MapQuest

In [14]:
mapquest_api_key = "<your-token>"

In [15]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:

            dc = Connector(
                "mapquest",
                _concurrency=c,
                _auth={"access_token": mapquest_api_key},
            )

            await dc.query("place", q="new york", sort="relevance", _count=500)
            exp1data = exp1data.append(
                pd.Series(["mapquest", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

['mapquest', 5, 6.683734085410833]


### Guardian

In [None]:
guardian_api_key = "<your-token>"

In [19]:
for c in concurrencies:
    for x in range(repeat):
        with Timer() as t:

            dc = Connector(
                "guardian",
                _concurrency=c,
                _auth={"access_token": guardian_api_key},
            )

            await dc.query("article", q="covid-19", _count=100)
            exp1data = exp1data.append(
                pd.Series(["Guardian", c, t.elapsed], index=exp1data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

['Guardian', 5, 3.420564793050289]


In [None]:
scale = alt.Scale(range=[*PALETTE, "white", "black"])
base = alt.Chart(exp1data, width=200)

(
    base.mark_bar().encode(
        x=alt.X("mean(Elapsed)", title="Elapsed (s)"),
        y=alt.Y("Concurrency:N", title=None),
        color=alt.Color("Concurrency:N", legend=alt.Legend(labelFontSize=18, titleFontSize=12), scale=scale),
    )
    + base.mark_errorbar(color="black", extent="ci").encode(
        x=alt.X("Elapsed", title="Elapsed (s)"),
        y=alt.Y("Concurrency:N", title=None),
    )
    + base.mark_text(color="black", align="right", dx=37, dy=1, size=16).encode(
        x=alt.X("mean(Elapsed):Q", title="Elapsed (s)"),
        y=alt.Y("Concurrency:N", title=""),
        text=alt.Text("mean(Elapsed):Q", format=".1f"),
    )
).facet(alt.Facet("API:N", header=alt.Header(labelFontSize=18, title=None)), columns=3).resolve_scale(
    x="independent",
    y="independent"
).configure_axis(
    labelFontSize=16, titleFontSize=12
)

# Exp2

In [38]:
exp2data = pd.DataFrame(None, columns=["API", "Concurrency", "Elapsed", "Error Handling", "Quota0", "Quota1"])

In [None]:
terms = [
    "Sushi",
    "Curry",
    "Taco",
    "Burgers",
    "Pizza",
    "Salad",
    "Pho",
    "Ramen",
    "Seafood",
    "Macarons",
]
locations = [
    "Vancouver",
    "Toronto",
    "New York",
    "Boston",
    "Chicago",
    "Los Angeles",
    "Seattle",
    "San Francisco",
    "Austin",
    "Dallas",
]


for c in [10, 15, 20, 30, 40, 50]:
    for x in range(1):
        with Timer() as t:
            dc = Connector(
                "yelp",
                _concurrency=c,
                _auth={"access_token": yelp_token},
            )
            qs = []
            for term in terms:
                for location in locations:
                    q = dc.query(
                        "businesses",
                        term=term,
                        location=location,
                        _count=100,
                    )
                    qs.append(q)
            await asyncio.gather(*qs)
            exp2data = exp1data.append(
                pd.Series(["yelp", c, t.elapsed, "No", dc.start_quota, dc.current_quota], index=exp2data.columns),
                ignore_index=True,
            )
            await asyncio.sleep(2)

In [None]:
exp2data["consumption"] = exp2data["Quota0"].astype(int) - exp2data["Quota1"].astype(int)

In [None]:
base = alt.Chart(exp2data[exp2data.Concurrency>5], height=120)


base.mark_bar().encode(
    x=alt.X("Error Handling", title="", axis=alt.Axis(labels=False)),
    y=alt.Y(
        "consumption:Q", title="Quota Consumption", scale=alt.Scale(domain=[0, 1400])
    ),
    color=alt.Color(
        "Error Handling:N", legend=alt.Legend(labelFontSize=18, titleFontSize=12), scale=alt.Scale(range=[PALETTE[1], PALETTE[3]])
    ),
    tooltip=["consumption"],
).facet(column=alt.Column("Concurrency", title="Concurrency (req/s)")).configure_axis(
    labelFontSize=16, titleFontSize=12
)