In [1]:
# get catalogue
import requests
import json
source = "https://catalog-api.udacity.com/v1/catalog?locale=en-us"

r = requests.get(source)
print("fetching catalogue data")

data = r.json()

with open("./data/raw_data.json", "w") as text_file:
    text_file.write(json.dumps(data, indent=2))
    print("done writing data")

fetching catalogue data
done writing data


In [2]:
import json
import requests
# ---
# Utilities
# ---

def filter_by_level(data, level):
    return [course for course in data if course["level"] == level]

def write_to_file(data, path_name):
    with open(f"{path_name}", "w+") as data_file:
        data_file.write(json.dumps(data, indent=2))
        print(f"done writing to {path_name}")

def fetch_reviews_for_key(key):
    source = f"https://ratings-api.udacity.com/api/v1/reviews?node={key}&limit=5000&page=1"
    r = requests.get(source)
    return r.json()


In [3]:

# course jobs
with open("./data/raw_data.json") as data_file:
    data = json.load(data_file)
    
    courses = data["courses"]

    # separate beginner, intermediate and advanced courses
    beginner_courses = filter_by_level(courses, "beginner")
    intermediate_courses = filter_by_level(courses, "intermediate")
    advanced_courses = filter_by_level(courses, "advanced")

    other = [
        course["key"] for course in data["courses"] 
            if course["level"] != "beginner" and 
               course["level"] != "intermediate" and 
               course["level"] != "advanced"
    ]

    print("Start Write course jobs")

    write_to_file(beginner_courses, "./data/courses/level/beginner.json")
    write_to_file(intermediate_courses, "./data/courses/level/intermediate.json")
    write_to_file(advanced_courses, "./data/courses/level/advanced.json")

    print("Finished Write course jobs")


Start Write course jobs
done writing to ./data/courses/level/beginner.json
done writing to ./data/courses/level/intermediate.json
done writing to ./data/courses/level/advanced.json
Finished Write course jobs


In [4]:
# degree jobs
with open("./data/raw_data.json") as data_file:
    data = json.load(data_file)
    
    degrees = data["degrees"]

    beginner_degrees = filter_by_level(degrees, "beginner")
    intermediate_degrees = filter_by_level(degrees, "intermediate")
    advanced_degrees = filter_by_level(degrees, "advanced")
    
    print("Start Write degree jobs")

    write_to_file(beginner_degrees, "./data/degrees/level/beginner.json")
    write_to_file(intermediate_degrees, "./data/degrees/level/intermediate.json")
    write_to_file(advanced_degrees, "./data/degrees/level/advanced.json")
    
    print("Finished Write degree jobs")

Start Write degree jobs
done writing to ./data/degrees/level/beginner.json
done writing to ./data/degrees/level/intermediate.json
done writing to ./data/degrees/level/advanced.json
Finished Write degree jobs


In [5]:
# massage data
data = fetch_reviews_for_key("nd0044")

reviews = {
  "average_rating": data["nd_avg_rating"],
  "count": data["count"],
  "stats": data["stats"]
}

print(json.dumps(reviews, indent=2))

{
  "average_rating": 4.536585365853658,
  "count": 164,
  "stats": [
    {
      "rating": 5,
      "count": 109,
      "percentage": 66.46341463414635,
      "_id": 5
    },
    {
      "rating": 4,
      "count": 43,
      "percentage": 26.21951219512195,
      "_id": 4
    },
    {
      "rating": 3,
      "count": 7,
      "percentage": 4.2682926829268295,
      "_id": 3
    },
    {
      "rating": 2,
      "count": 1,
      "percentage": 0.6097560975609756,
      "_id": 2
    },
    {
      "rating": 1,
      "count": 4,
      "percentage": 2.4390243902439024,
      "_id": 1
    }
  ]
}


In [6]:
# TODO: 
# - list out all nano-degrees and courses with names and levels
# - extract all tags
# - number of projects? (does projects correlate with higher rating?)

# ---
# cleaning data
# ---
import json

with open("./data/raw_data.json") as data_file:
    json_data = json.load(data_file)
    
    degrees = [{
        "affiliates": degree.get("affiliates", ""),
        "key": degree.get("key", ""),
        "title": degree.get("title", ""),
        "level": degree.get("level", ""),
        "num_of_projects": len(degree.get("projects")) if degree.get("projects") else 0,
        "tags": degree.get("tags"),
        } for degree in json_data["degrees"] if degree["level"] != "" and degree["title"] != ""
    ]

    with open("./data/degrees.json", "w") as data_file:
        data_file.write(json.dumps(degrees))

The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.
