In [1]:
import json
import pandas as pd

In [2]:
with open("average_js_score_by_book.json") as f:
    average_js_score_by_book = json.load(f)
    
with open("data_cleaned.json") as f:
    data_cleaned = json.load(f)

In [3]:
scores = pd.DataFrame(average_js_score_by_book).sort_index()

In [4]:
standardized_scores = (scores - scores.stack().mean()) / scores.stack().std()

In [5]:
with open("data.json") as f:
    subjects = {int(k): ("Economics" if "Economics" in " ".join(v["subject"]).split() else "Other") for k,v in json.load(f).items()}

In [6]:
titles = {}
authors = {}
for book_data in data_cleaned:
    titles[book_data["ebook_no"]] = book_data["title"]
    authors[book_data["ebook_no"]] = book_data["author"]

In [17]:
matched = pd.concat([standardized_scores.min(), standardized_scores.idxmin()], axis=1)
matched["title"] = matched.index.to_series().map(int).map(titles.get)
matched["author"] = matched.index.to_series().map(int).map(authors.get)
matched["subject"] = matched.index.to_series().map(int).map(subjects.get)
matched.columns = ["js", "matched_book_id", 'title', 'author', "subject"]
matched["matched_title"] = matched["matched_book_id"].map(int).map(titles.get)
matched["matched_author"] = matched["matched_book_id"].map(int).map(authors.get)
matched["matched_subject"] = matched["matched_book_id"].map(int).map(subjects.get)

In [18]:
matched["subject"].value_counts()

subject
Other        184
Economics     41
Name: count, dtype: int64

In [19]:
matched["matched_book_id"].nunique()

197

In [21]:
matched[matched["subject"] == "Economics"].sort_values("js",ascending=False) # [matched["subject"] == "Economics"]

Unnamed: 0,js,matched_book_id,title,author,subject,matched_title,matched_author,matched_subject
26716,-1.278586,20019,The Crown of Wild Olivealso Munera Pulveris; P...,"Ruskin, John, 1819-1900",Economics,Lectures on LandscapeDelivered at Oxford in Le...,"Ruskin, John, 1819-1900",Other
36541,-2.453528,61483,"Unto This Last, and Other Essays on Political ...","Ruskin, John, 1819-1900",Economics,"Definitions in Political Economy,Preceded by a...","Malthus, T. R. (Thomas Robert), 1766-1834",Economics
65693,-2.634649,18603,"The Forgotten Man, and Other Essays","Sumner, William Graham, 1840-1910",Economics,What Social Classes Owe to Each Other,"Sumner, William Graham, 1840-1910",Economics
41936,-2.899419,41936,Principles of Political Economy,"Perry, Arthur Latham, 1830-1905",Economics,Principles of Political Economy,"Perry, Arthur Latham, 1830-1905",Economics
59792,-3.01062,59792,Hume's Political Discourses,"Hume, David, 1711-1776",Economics,Hume's Political Discourses,"Hume, David, 1711-1776",Economics
46333,-3.159399,46333,The Social Contract & Discourses,"Rousseau, Jean-Jacques, 1712-1778",Economics,The Social Contract & Discourses,"Rousseau, Jean-Jacques, 1712-1778",Economics
27698,-3.473887,27698,"Principles of Political Economy, Vol. 1","Roscher, Wilhelm, 1817-1894",Economics,"Principles of Political Economy, Vol. 1","Roscher, Wilhelm, 1817-1894",Economics
15962,-3.794874,58584,Essays on Political Economy,"Bastiat, Frédéric, 1801-1850",Economics,Index of the Project Gutenberg Works of Frédér...,"Bastiat, Frédéric, 1801-1850",Other
55308,-3.803687,55308,"Progress and Poverty, Volumes I and IIAn Inqui...","George, Henry, 1839-1897",Economics,"Progress and Poverty, Volumes I and IIAn Inqui...","George, Henry, 1839-1897",Economics
38655,-3.922412,38655,"Principles of Political Economy, Vol. 2","Roscher, Wilhelm, 1817-1894",Economics,"Principles of Political Economy, Vol. 2","Roscher, Wilhelm, 1817-1894",Economics


In [13]:
matched = matched[['author', 'title', 'matched_author', 'matched_title', 'js']]

In [16]:
matched.sort_values("js",ascending=True)[:50]

Unnamed: 0,js,matched_book_id,title,author,subject,matched_title,matched_author,matched_subject
26716,0.83638,4779,The Crown of Wild Olivealso Munera Pulveris; P...,"Ruskin, John, 1819-1900",Economics,Russian Roulette: Russia's Economy in Putin's Era,"Vaknin, Samuel, 1961-",Other
19164,0.860127,4779,"Lectures on Art, Delivered Before the Universi...","Ruskin, John, 1819-1900",Other,Russian Roulette: Russia's Economy in Putin's Era,"Vaknin, Samuel, 1961-",Other
21263,0.890898,33310,"On the Old Road, Vol. 2 (of 2)A Collection of ...","Ruskin, John, 1819-1900",Other,"On The Principles of Political Economy, and Ta...","Ricardo, David, 1772-1823",Economics
19980,0.948998,52091,A Joy For Ever (and Its Price in the Market),"Ruskin, John, 1819-1900",Other,An essay on the foundations of geometry,"Russell, Bertrand, 1872-1970",Other
49508,0.953163,33310,"Arrows of the Chace, vol. 1/2being a collectio...","Ruskin, John, 1819-1900",Other,"On The Principles of Political Economy, and Ta...","Ricardo, David, 1772-1823",Economics
36541,0.968622,60816,"Unto This Last, and Other Essays on Political ...","Ruskin, John, 1819-1900",Economics,The Elements of Perspectivearranged for the us...,"Ruskin, John, 1819-1900",Other
8523,1.009047,33310,Val d'ArnoTen Lectures on the Tuscan Art Direc...,"Ruskin, John, 1819-1900",Other,"On The Principles of Political Economy, and Ta...","Ricardo, David, 1772-1823",Economics
15200,1.023676,4779,Selections From the Works of John Ruskin,"Ruskin, John, 1819-1900",Other,Russian Roulette: Russia's Economy in Putin's Era,"Vaknin, Samuel, 1961-",Other
25678,1.036069,33310,On the Old Road Vol. 1 (of 2)A Collection of...,"Ruskin, John, 1819-1900",Other,"On The Principles of Political Economy, and Ta...","Ricardo, David, 1772-1823",Economics
30755,1.04714,33310,"The Stones of Venice, Volume 2 (of 3),","Ruskin, John, 1819-1900",Other,"On The Principles of Political Economy, and Ta...","Ricardo, David, 1772-1823",Economics
