Skip to content

Commit

Permalink
#5 #15 Load extraneous resources on rooms and buildings
Browse files Browse the repository at this point in the history
  • Loading branch information
blu3r4y committed Nov 25, 2020
1 parent ae27023 commit 5006369
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 14 deletions.
5 changes: 0 additions & 5 deletions src/resources/rooms/buildings.json

This file was deleted.

9 changes: 8 additions & 1 deletion src/scraper/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { writeFile } from "fs";
import { Log } from "./log";
import { Scraper } from "./scraper";
import { IndexDto } from "../common/dto";
import { BuildingToRoomsMap } from "./types";

/** The full base URL to the kusss instance */
declare let KUSSS_URL: string;
Expand All @@ -18,6 +19,10 @@ declare let MAX_RETRIES: number;
declare let REQUEST_TIMEOUT_MS: number;
/** How much time should we wait before retrying a request again */
declare let REQUEST_DELAY_MS: number;
/** Provides a list of room names that shall always be ignored because they are irrelevant */
declare let IGNORE_ROOMS: string[];
/** Provides manual metadata for room and building mappings */
declare let EXTRA_BUILDING_METADATA: BuildingToRoomsMap;

Log.info("initializing scraper");

Expand All @@ -34,7 +39,9 @@ const scraper = new Scraper(
USER_AGENT,
REQUEST_TIMEOUT_MS,
MAX_RETRIES,
REQUEST_DELAY_MS
REQUEST_DELAY_MS,
IGNORE_ROOMS,
EXTRA_BUILDING_METADATA
);

scraper
Expand Down
7 changes: 7 additions & 0 deletions src/scraper/resources/buildings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"buildings": {
"Evangelical Student Dormitory (ESH)": ["ESH 1", "ESH 2", "ESH 3"],
"Johannes Kepler Dormitory (JKH)": ["KEP 1", "KEP 3"],
"Franz Jägerstätter Dormitory (KHG)": ["KHG I"]
}
}
File renamed without changes.
30 changes: 24 additions & 6 deletions src/scraper/scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ import { TimeUtils } from "../common/utils";
import { CourseScraper } from "./components/courses";
import { BookingScraper } from "./components/bookings";
import { BuildingScraper } from "./components/buildings";
import { RoomScrape, CourseScrape, ScrapeStatistics } from "./types";
import {
RoomScrape,
CourseScrape,
ScrapeStatistics,
BuildingToRoomsMap as BuildingToRooms,
} from "./types";
import { KusssRoomScraper, JkuRoomScraper } from "./components/rooms";
import {
IndexDto,
Expand All @@ -35,18 +40,27 @@ import {
RangeDto,
} from "../common/dto";

/** Intermediate type that is used for building the availability dto structure */
/**
* Intermediate type that is used for building the availability dto structure
*/
declare type AvailableDict = FactoryDictionary<
string,
FactoryDictionary<string, TimeSpanDto[]>
>;

/**
* Intermediate type that wraps `RoomScrape` objects by their canonical room name
*/
declare type RoomScrapeDict = { [canonical: string]: RoomScrape };

export class Scraper {
public readonly jkuUrl: string;
public readonly kusssUrl: string;
public statistics: ScrapeStatistics;

private readonly quickMode: boolean;
private readonly ignoreRooms: string[];
private readonly extraBuildingMeta: BuildingToRooms;
private readonly requestLimiter: Bottleneck;
private readonly requestOptions: OptionsOfTextResponseBody;

Expand All @@ -64,11 +78,15 @@ export class Scraper {
userAgent = "jku-room-search-bot",
requestTimeout = 5000,
maxRetries = 5,
requestDelay = 500
requestDelay = 500,
ignoreRooms: string[] = [],
extraBuildingMeta: BuildingToRooms = {}
) {
this.quickMode = quickMode;
this.jkuUrl = jkuUrl;
this.kusssUrl = kusssUrl;
this.ignoreRooms = ignoreRooms;
this.extraBuildingMeta = extraBuildingMeta;

// initialize request configuration and statistics object
this.requestOptions = {
Expand Down Expand Up @@ -136,7 +154,7 @@ export class Scraper {

/* scrape rooms inside buildings */

const jRooms: { [canonical: string]: RoomScrape } = {};
const jRooms: RoomScrapeDict = {};
for (const [i, building] of buildings.entries()) {
const p = (i + 1) / buildings.length;
const scrapes = await jkuRoomScraper.scrape(building, p);
Expand All @@ -155,7 +173,7 @@ export class Scraper {

/* scrape rooms searchable from kusss */

const kRooms: { [canonical: string]: RoomScrape } = {};
const kRooms: RoomScrapeDict = {};
const kRoomScrapes = await kusssRoomScraper.scrape();
kRoomScrapes.forEach((r) => (kRooms[Scraper.getCncnlName(r.name)] = r));

Expand Down Expand Up @@ -345,7 +363,7 @@ export class Scraper {
}
}

private logJkuRoomMetrics(rooms: { [canonical: string]: RoomScrape }): void {
private logJkuRoomMetrics(rooms: RoomScrapeDict): void {
const numRooms = Object.keys(rooms).length;
Log.milestone(
"room",
Expand Down
13 changes: 11 additions & 2 deletions src/scraper/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export declare interface BuildingScrape {
/**
* A scraped room entity
*/
export declare interface RoomScrape {
export declare type RoomScrape = {
/** The room name */
name: string;
/** The maximum capacity of the room */
Expand All @@ -23,7 +23,7 @@ export declare interface RoomScrape {
buildingId?: number;
/** The <option> identifier used at the KUSSS homepage */
kusssId?: string;
}
};

/**
* A scraped course entity
Expand Down Expand Up @@ -65,6 +65,15 @@ export declare interface ScrapeStatistics {
range?: RangeDto;
}

/**
* Provides a mapping from building names to rooms,
* so that rooms that might not be found on the homepage
* can be mapped with this additional data structure
*/
export declare interface BuildingToRoomsMap {
[building: string]: string[];
}

export const SEARCH_PAGE = "/kusss/coursecatalogue-start.action?advanced=true";
export const SEARCH_RESULTS =
"/kusss/coursecatalogue-search-lvas.action?sortParam0courses=lvaName&asccourses=true" +
Expand Down
11 changes: 11 additions & 0 deletions webpack.config.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
const fs = require("fs");
const path = require("path");
const glob = require("glob");
const webpack = require("webpack");
Expand Down Expand Up @@ -127,6 +128,16 @@ const scraperConfig = (env, options) => {
MAX_RETRIES: JSON.stringify(5),
REQUEST_TIMEOUT_MS: JSON.stringify(5 * 1000),
REQUEST_DELAY_MS: JSON.stringify(500),
IGNORE_ROOMS: JSON.stringify(
JSON.parse(fs.readFileSync("./src/scraper/resources/ignore.json"))[
"rooms"
]
),
EXTRA_BUILDING_METADATA: JSON.stringify(
JSON.parse(fs.readFileSync("./src/scraper/resources/buildings.json"))[
"buildings"
]
),
}),
],
};
Expand Down

0 comments on commit 5006369

Please sign in to comment.