In [28]:
import requests
import pandas as pd
import altair as alt
import json
from itertools import cycle


In [99]:
url = "https://elearningindustry.com/content/directory/listings?category=7113&sort=rating&filters[7107_deployment][]=158241&filters[7107_market][]=158325&filters[7107_market][]=158340&filters[7107_market][]=158304&offset=0&limit=1000"
res = requests.get(url)
# safe for later
with open('output.json', 'w') as f:
    json.dump(res.json(), f)

In [149]:
with open('output.json') as f:
    data = json.load(f)

def process_response(url_json: list) -> dict:
    """ Process HTTP Response
    Flatten nested HTTP request into a simpler document.

    Parameters
    ----------
    url_json: list
        HTTP response json produced using the .json() method from a Response object from the requests library. 
    """
    out = {}
    for dict in url_json:
        out[dict['title']] = {}
        out[dict['title']]['name'] = dict['title']
        out[dict['title']]['url'] = dict['website']
        out[dict['title']]['rating'] = dict['rating']
        out[dict['title']]['review'] = dict['reviewCount']
        out[dict['title']]['rank'] = dict['rank']

        for deployment in dict['deployment']:
            out[dict['title']]['deployment - {dep}'.format(dep = deployment['name'])] = True
        for integration in dict['integrations']:
            out[dict['title']]['integrations - {int}'.format(int = integration['name'])] = True
        for feature in dict['features']:
            out[dict['title']]['features - {cat} - {feat}'.format(cat = feature['parent'], feat = feature['name'])] = True
    return(out)

def process_dataframe(df: pd.DataFrame, only_true: bool = True) -> pd.DataFrame:
    """ Process pd.DataFrame from Response

    Ingest a messy DataFrame and clean it up!
    """
    for column in df.columns:
        if ' - ' in column:
            df[column] = df[column].fillna(False)
    wanted_columns = [col for col in df.columns if ' - ' in col]
    df = df.melt(id_vars=list(set(df.columns) - set(wanted_columns)),
                 value_vars=wanted_columns,
                 var_name='Quality', value_name='Bool')
    df['Type'] = df['Quality'].str.split(' - ').str[0]
    df['Feat_Cat'] = df['Quality'].str.extract(r'- (.*?) -')
    df['Quality'] = df['Quality'].str.split(' - ').str[-1]
    

    if only_true:
        df = df[df['Bool'] == True]

    return(df)

# Extract and process data as a DataFrame
df = process_dataframe(
        pd.DataFrame.from_dict(
            process_response(data),
            orient='index')
            )

In [150]:
def get_features_axis_order(url_json: list) -> dict:
    """ Extract Feateures for Graphing

    Parameters
    ----------
    url_json: list
        HTTP response json produced using the .json() method from a Response object from the requests library. 
    """
    out = {}
    for dict in url_json:
        for feature in dict['features']:
            if feature['parent'] in out.keys():
                if feature['name'] not in out[feature['parent']]:
                    out[feature['parent']].append(feature['name'])
                else:
                    pass
            else:
                out[feature['parent']] = []
                out[feature['parent']].append(feature['name'])
    for key, value in out.items():
        out[key] = sorted(value)

    return(out)

order_list = []
for sub in get_features_axis_order(data).values():
    order_list.extend(sub)

category_list = []
for sub in get_features_axis_order(data).keys():
    rep = len(get_features_axis_order(data)[sub])
    category_list.extend([sub] * rep)

char_colors = {}
colors = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'cyan', 'magenta', 'brown', 'gray', 'teal', 'navy', 'olive', 'lime', 'maroon', 'aqua', 'silver', 'gold', 'black']
color_cycle = cycle(colors)

color_list = []
for char in category_list:
    if char not in char_colors:
        char_colors[char] = next(color_cycle)
    color_list.append(char_colors[char])

category_mapping = {
    '158237': 'red',
    '158282': 'blue',
    '158318': 'green',
    '158438': 'yellow',
    '158483': 'orange',
    '158552': 'purple',
    '158624': 'pink',
    '158732': 'cyan',
    '158765': 'magenta',
    '158834': 'brown',
    '158879': 'grey',
    '158942': 'teal',
    '159005': 'navy',
    '159044': 'olive',
    '159101': 'lime',
    '159200': 'maroon',
    '159242': 'aqua',
    '159275': 'silver',
    '159308': 'gold',
    '159377': 'black'
}

df['f_colors'] = df['Feat_Cat'].replace(category_mapping)

top20 = df.groupby('name')['review'].max().sort_values(ascending=False)[0:20]
counts = df[(df['Type'] == 'features') & (df['name'].isin(top20.index))].groupby('Quality').size().reset_index(name='qcount')
df = pd.merge(df, counts, on='Quality', how='left')

In [151]:
df

Unnamed: 0,rating,name,url,rank,review,Quality,Bool,Type,Feat_Cat,f_colors,qcount
0,3.90,Moodle,https://moodle.com/,3939,101,Mobile Application,True,deployment,,,
1,4.08,Cornerstone Learning,https://www.cornerstoneondemand.com/solutions/...,3060,75,Mobile Application,True,deployment,,,
2,4.37,Chamilo,https://chamilo.org/,2054,47,Mobile Application,True,deployment,,,
3,4.20,Open edX,https://openedx.org/,1764,42,Mobile Application,True,deployment,,,
4,4.22,CanopyLAB,https://www.canopylab.com/,1646,39,Mobile Application,True,deployment,,,
...,...,...,...,...,...,...,...,...,...,...,...
17172,4.86,edloomio,https://edloomio.com,97,2,Zopim,True,integrations,,,
17173,3.50,Virtual Slate,https://virtualslate.com/,35,1,Zopim,True,integrations,,,
17174,0.00,End Pen Learning Management Solutions,https://endpen.com/,0,0,Zopim,True,integrations,,,
17175,0.00,Jotform Enterprise for Schools,https://www.jotform.com/enterprise/education,0,0,Zopim,True,integrations,,,


In [162]:
alt.data_transformers.disable_max_rows()
chart = alt.Chart(data = df[(df['Type'] == 'features') & (df['name'].isin(top20.index))]
          ).mark_rect().encode(
    alt.X('Quality', sort=order_list).title("").axis(labelAngle=90),
    alt.Y('name', sort=list(top20.index)).title(""),
    alt.Color('f_colors:N', legend=None, scale=None)
    ).properties(
    title={
        'text':'Top 20 SaaS LMS for Academic, Non Profit, or Small Businesses',
        'subtitle':'Colors represent feature class',
        'subtitleColor': 'grey'
    }
)

text = alt.Chart(data = df[(df['Type'] == 'features') & (df['name'].isin(top20.index))]
                 ).mark_text().encode(
                     x=alt.X('Quality', sort=order_list),
                     y=alt.value(-5),
                     text=alt.Text('qcount')
                 )

out  = chart + text

out.configure_view(
    step=13,
    strokeWidth=0
).configure_title(
    anchor='start'
).configure_scale(
    bandPaddingInner=0.1)