In [None]:
import requests
import json

#import matplotlib.pyplot as plt
import numpy as np

import random
from pprint import pprint

# Making an HTTP request

As discussed in the slides

In [None]:
r = requests.get("https://pokeapi.co/api/v2/pokemon-species/")
r

In [None]:
r.reason

In [None]:
if r.ok:
    pprint(r.json())
else:
    print(r.reason)

In [None]:
r.headers

# JSON

In [None]:
raw_json = """
{
    "a": true,
    "b": {
        "c": 1,
        "d": [
            {"x": 1},
            {"y": 2}
        ]
    }
}
"""

In [None]:
parsed_json = json.loads(raw_json)
parsed_json

In [None]:
print(parsed_json["a"])
print(parsed_json["b"]["d"][0]["x"])

In [None]:
json.dumps({"a": 1})

# Visualizing Results

In [None]:
r = requests.get("https://api.coindesk.com/v1/bpi/historical/close.json?start=2012-01-01&end=2019-09-05")

In [None]:
r.json()

In [None]:
%matplotlib inline

x = list(r.json()['bpi'].keys())
y = list(r.json()['bpi'].values())

In [None]:
y

In [None]:
%matplotlib inline

plt.plot(x, y)

# Access Token

Go get one of your own from https://github.com/settings/tokens! Don't just steal mine!

In [None]:
r = requests.get("https://api.github.com/orgs/rubikloud/repos")
[repo['full_name'] for repo in r.json()]

In [None]:
from requests.auth import HTTPBasicAuth

GITHUB_TOKEN = ""
r = requests.get("https://api.github.com/orgs/rubikloud/repos", auth=HTTPBasicAuth(GITHUB_TOKEN, ""))
if r.ok:
    print([repo['full_name'] for repo in r.json()])
else:
    print(r.status_code, r.reason)

# Pagination

In the result above, we can see that GitHub returned some private repositories, as we expected (try opening them in your browser to check!), but it still only returned a handful of repositories. Where did the rest go?

In [None]:
print(r.headers)

Notice those `Link` headers. Those are GitHub's way of telling us about later or earlier pages. What happens if we iterate over them?

Well, first we need to parse them...

In [None]:
def parse_github_pagination(link_header):
    link_header_parts = link_header.split(", ")
    return {
        link_header_part.split("; ")[1][5:-1]: link_header_part.split("; ")[0][1:-1]
        for link_header_part in link_header_parts
    }

parse_github_pagination(r.headers['Link'])

Great. So as long as our response _has_ a `rel="next"` in its `Link` header, we can just keep visiting that page, appending our results together as we go.

In [None]:
repos = [repo["full_name"] for repo in r.json()]
links = parse_github_pagination(r.headers["Link"])
while "next" in links:
    r = requests.get(links["next"], auth=HTTPBasicAuth(GITHUB_TOKEN, ""))
    links = parse_github_pagination(r.headers["Link"])
    repos += [repo["full_name"] for repo in r.json()]

repos

# Mixing them together

In [None]:
for repo in repos:
    potential_pokemon_name = repo.split('/')[1]
    r = requests.get("https://pokeapi.co/api/v2/pokemon/%s" % potential_pokemon_name.lower())
    is_pokemon = r.ok
    print("%s: %s" % (potential_pokemon_name, is_pokemon))

# Using APIs to Train Models

In [None]:
YELP_TOKEN = ""

In [None]:
r = requests.get("https://api.yelp.com/v3/businesses/search?location=Toronto&limit=50", headers={"Authorization": "Bearer %s" % YELP_TOKEN})

In [None]:
print(r.status_code, r.reason, r.content)

In [None]:
r.json()

In [None]:
review_labels = []
for business in r.json()['businesses']:
    reviews = requests.get("https://api.yelp.com/v3/businesses/%s/reviews" % business['id'], headers={"Authorization": "Bearer %s" % YELP_TOKEN}).json()
    for review in reviews['reviews']:
        review_labels.append((review['text'].rstrip('.'), review['rating']))
review_labels

In [None]:
review_features = [(x.split(' '), 'positive' if y > 3 else 'negative') for (x, y) in review_labels]
review_features

In [None]:
from nltk.sentiment import SentimentAnalyzer
import nltk.sentiment.util
from nltk.classify import NaiveBayesClassifier

random.shuffle(review_features)
training_docs = review_features[:120]
test_docs = review_features[120:]

print("Training: %d, Testing: %d" % (len(training_docs), len(test_docs)))

sentim_analyzer = SentimentAnalyzer()

In [None]:
all_words_neg = sentim_analyzer.all_words([nltk.sentiment.util.mark_negation(doc) for doc in training_docs])
all_words_neg

In [None]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
sentim_analyzer.add_feat_extractor(nltk.sentiment.util.extract_unigram_feats, unigrams=unigram_feats)

In [None]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(test_docs)

In [None]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)
for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
     print('{0}: {1}'.format(key, value))

In [None]:
from collections import defaultdict

c = defaultdict(int)
for x in review_labels:
    c[x[1]] += 1

plt.bar(c.keys(), c.values())

# GraphQL

In [None]:
r = requests.post("https://api.github.com/graphql", auth=HTTPBasicAuth(GITHUB_TOKEN, ""), json={"query": """
query {
  organization(login: "rubikloud") {
    name
    repositories(privacy: PRIVATE, first: 5) {
      edges {
        node {
          name
          pullRequests(last: 5) {
            edges {
              node {
                title
                url
              }
            }
          }
        }
      }
    }
  }
}"""})

In [None]:
r.status_code

In [None]:
r.json()