captbaritone · captbaritone · Nov 3, 2023
diff --git a/examples/internet-archive/ArchiveApi.ts b/examples/internet-archive/ArchiveApi.ts
@@ -0,0 +1,88 @@
+// Note: These match the fields requested in the scrape API.
+export type ItemApiResponse = {
+  identifier: string;
+  title: string;
+  mediatype: string;
+  stars: number;
+};
+
+const ITEM_FIELDS = ["identifier", "title", "mediatype", "stars"].join(",");
+
+export type ScapeApiResponse = {
+  items: ItemApiResponse[];
+};
+
+// https://archive.org/services/swagger/?url=%2Fservices%2Fsearch%2Fbeta%2Fswagger.yaml#!/search/get_scrape_php
+export async function scrapeApi(
+  query: string,
+  count: number
+): Promise<ScapeApiResponse> {
+  if (count > 10000) {
+    throw new Error("The maximum value for `count` is 10,000");
+  }
+  if (count < 100) {
+    throw new Error("The minimum value for `count` is 100");
+  }
+  // We use the scrape API because it supports cursor-based pagination.
+  const searchUrl = new URL("https://archive.org/services/search/v1/scrape");
+  searchUrl.searchParams.set("q", query);
+  searchUrl.searchParams.set("count", count.toString());
+  searchUrl.searchParams.set("fields", ITEM_FIELDS);
+  // TODO: If only `count` is being read, we could use the `totals_only` param as an optimization.
+  const response = await fetch(searchUrl);
+  if (!response.ok) {
+    throw new Error(`Failed to search for ${query}: ${response.statusText}`);
+  }
+
+  return response.json();
+}
+
+type MetadataApiFileResponse = {
+  name: string;
+  source: "original" | "derivative";
+  format: string;
+  md5: string;
+  size?: string;
+  mtime?: string;
+  crc32?: string;
+  sha1?: string;
+
+  // Lots of these almost seem free-form
+  /*
+  rotation?: string;
+  original?: string;
+  pdf_module_version?: string;
+  ocr_module_version?: string;
+  ocr_converted?: string;
+  */
+};
+
+export type MetadataApiResponse = {
+  files: MetadataApiFileResponse[];
+  files_count: number;
+  item_last_updated: number;
+  item_size: number;
+  metadata: {
+    title: string;
+    creator: string;
+    uploader: string;
+    subject: string[];
+    description: string;
+    date: string;
+    collection: string[] | string;
+  };
+};
+
+// https://blog.archive.org/2013/07/04/metadata-api/
+export async function metadataApi(
+  identifier: string
+): Promise<MetadataApiResponse> {
+  // FIXME: Is there a safer way to do this that prevents injection attacks?
+  const url = new URL(`http://archive.org/metadata/${identifier}`);
+  const response = await fetch(url);
+  if (!response.ok) {
+    throw new Error(`Failed to search for ${url}: ${response.statusText}`);
+  }
+
+  return response.json();
+}
diff --git a/examples/internet-archive/README.md b/examples/internet-archive/README.md
@@ -0,0 +1,26 @@
+# Grats Internet Archive
+
+An in-progress implementation of a GraphQL facade over the [Internet Archive's](https://archive.org) REST API.
+
+Some ideas I'd like to explore in this example:
+
+- [ ] Isomorphism. Can Grats run in the browser and on the server?
+- [ ] Grafast. Can we use Grafast's query planing approach with Grats? Can it reduce waterfalls?
+
+## TODO
+
+- [ ] Items should really be an interface that could be a concrete item (is there a name for this?) or a collection.
+- [ ] Collections should expose their item properties
+- [ ] Top level query fields to get item/collection
+- [ ] Type for files
+- [ ] Type for user?
+
+## Internet Archive API Documentation
+
+The Internet Archive API is a bit haphazard in how it's documented. Here are some links include relevant information:
+
+- https://archive.org/developers/index.html
+- https://blog.archive.org/2013/07/04/metadata-api/#read
+-
+
+Below
diff --git a/examples/internet-archive/package.json b/examples/internet-archive/package.json
@@ -0,0 +1,24 @@
+{
+  "name": "express-graphql-grats-example",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "start": "ts-node --esm server.ts",
+    "build": "tsc",
+    "dev": "tsc && ts-node --esm server.ts"
+  },
+  "dependencies": {
+    "graphql": "^16.6.0",
+    "graphql-yoga": "^5.0.0",
+    "grats": "workspace:*",
+    "typescript": "^4.9.5"
+  },
+  "devDependencies": {
+    "@types/node": "^18.14.6",
+    "ts-node": "^10.9.1"
+  },
+  "keywords": [],
+  "author": "",
+  "prettier": {}
+}
diff --git a/examples/internet-archive/schema.graphql b/examples/internet-archive/schema.graphql
@@ -0,0 +1,111 @@
+schema {
+  query: Query
+}
+
+directive @exported(
+  filename: String!
+  functionName: String!
+) on FIELD_DEFINITION
+
+directive @methodName(name: String!) on FIELD_DEFINITION
+
+"""
+Items can be placed in collections. For example, a collection called European
+Libraries can contain several items, one of which can be Euclid’s Geometry.
+An item can belong to more than one collection. See [Internet Archive
+Items](https://archive.org/developers/items.html).
+"""
+type Collection {
+  """
+  Unique identifier for this collection.
+  """
+  identifier: String
+  items(
+    """
+    Max 10,000
+    """
+    first: Int = 100
+  ): ItemsConnection
+  url: String
+}
+
+"""
+Archive.org is made up of “items”. An item is a logical “thing” that we
+represent on one web page on archive.org. An item can be considered as a
+group of files that deserve their own metadata. If the files in an item have
+separate metadata, the files should probably be in different items. An item
+can be a book, a song, an album, a dataset, a movie, an image or set of
+images, etc. Every item has an identifier that is unique across archive.org.
+
+https://archive.org/developers/items.html
+"""
+type Item {
+  collections: [Collection!]
+  creator_name: String
+  """
+  HTML string of the item's description.
+  """
+  description: String
+  """
+  The Internet Archive's unique identifier for this item.
+  """
+  identifier: String
+  mediaType: String
+  stars: Float
+  title: String
+  uploader_name: String
+  url: String
+}
+
+"""
+A connection to a list of items.
+"""
+type ItemsConnection {
+  """
+  A list of edges.
+  """
+  edges: [ItemsEdge!]
+  nodes: [Item!]
+}
+
+"""
+An edge in a connection of Search Items.
+"""
+type ItemsEdge {
+  """
+  The item at the end of the edge
+  """
+  node: Item
+}
+
+"""
+This API is a GraphQL facade on top of the Internet Archive's existing REST API.
+
+Its goal is to improve the developer experience of using the Internet Archive's
+API by:
+
+- Providing a single endpoint for all queries.
+- Providing a well defined schema that can be used to explore the API and reason about the data it returns.
+
+In the future it might also:
+
+- Provide an abstraction that can be used client-side in the browser or server-side in Node.js.
+- Provide a more efficient way to fetch data by leveraging query planing to batch requests or make other optimizations.
+- Provide a proof of concept to motivate the Internet Archive to build a GraphQL API.
+"""
+type Query {
+  """
+  Search the Internet Archive for books, movies, and more.
+  """
+  searchItems(
+    """
+    Max 10,000
+    """
+    first: Int = 100
+    query: String!
+  ): ItemsConnection
+    @exported(
+      filename: "../../examples/internet-archive/dist/schema/Query.js"
+      functionName: "searchItems"
+    )
+}
diff --git a/examples/internet-archive/schema/Collection.ts b/examples/internet-archive/schema/Collection.ts
@@ -0,0 +1,50 @@
+import { Int } from "../../../dist/src";
+import { scrapeApi } from "../ArchiveApi";
+import ItemsConnection from "./ItemsConnection";
+
+/**
+ * Items can be placed in collections. For example, a collection called European
+ * Libraries can contain several items, one of which can be Euclid’s Geometry.
+ * An item can belong to more than one collection. See [Internet Archive
+ * Items](https://archive.org/developers/items.html).
+ * @gqlType */
+export default class Collection {
+  /**
+   * Unique identifier for this collection.
+   * @gqlField */
+  identifier: string;
+
+  constructor(identifier: string) {
+    this.identifier = identifier;
+  }
+
+  /** @gqlField */
+  url(): string {
+    return "https://archive.org/details/" + this.identifier;
+  }
+
+  /** @gqlField */
+  async items({
+    first = 100,
+  }: {
+    /** Max 10,000 */
+    first?: Int;
+  }): Promise<ItemsConnection> {
+    if (first > 10000) {
+      throw new Error("The maximum value for `first` is 10,000.");
+    }
+
+    let response = await scrapeApi(
+      `collection:${this.identifier}`,
+      Math.max(first, 100)
+    );
+
+    if (first < 100) {
+      response = {
+        ...response,
+        items: response.items.slice(0, first),
+      };
+    }
+    return ItemsConnection.fromScapeApiResponse(response);
+  }
+}