In [1]:
import ipywidgets as widgets
import pandas as pd
from ipywidgets import Layout, AppLayout

import techminer.analytics as tc
import techminer.plots as plt
from techminer.analytics import load_scopus
import techminer.dashboards as dash
from techminer.keywords import Keywords

filepath = "../data/papers/urban-agriculture.csv"
df = pd.read_csv(filepath)

df = load_scopus(df)

2020-06-01 14:52:05,773 - INFO - Renaming and selecting columns ...
2020-06-01 14:52:05,780 - INFO - Formatting author names ...
2020-06-01 14:52:11,894 - INFO - Removing part of titles in foreing languages ...
2020-06-01 14:52:11,897 - INFO - Fusioning author and index keywords ...
2020-06-01 14:52:11,912 - INFO - NumExpr defaulting to 8 threads.
2020-06-01 14:52:11,929 - INFO - Extracting countries from affiliations ...
2020-06-01 14:52:18,541 - INFO - Extracting institutions from affiliations ...
2020-06-01 14:52:18,556 - INFO - Extracting country of 1st author ...
2020-06-01 14:52:18,559 - INFO - Extracting affiliation of 1st author ...
2020-06-01 14:52:18,563 - INFO - Counting number of authors ...


In [2]:
COLORMAPS = [
    "Greys",
    "Purples",
    "Blues",
    "Greens",
    "Oranges",
    "Reds",
    "YlOrBr",
    "YlOrRd",
    "OrRd",
    "PuRd",
    "RdPu",
    "BuPu",
    "GnBu",
    "PuBu",
    "YlGnBu",
    "PuBuGn",
    "BuGn",
    "YlGn",
    "Pastel1",
    "Pastel2",
    "Paired",
    "Accent",
    "Dark2",
    "Set1",
    "Set2",
    "Set3",
    "tab10",
    "tab20",
    "tab20b",
    "tab20c",
]

COLUMNS = [
    "Author Keywords",
    "Authors",
    "Countries",
    "Country 1st",
    "Document type",
    "Index Keywords",
    "Institution 1st" "Institutions",
    "Keywords",
    "Source title",
]

In [4]:
def correlation_analysis(x):
    def compute_by_term(
        column, by, method, minmax, cmap, filter_type, top_n
    ):
        #
        minmax = (minmax[0], minmax[1])
        #
        matrix, limit_values = tc.corr(
            x,
            column=column,
            by=by,
            method=method.lower(),
            show_between=minmax,
            cmap=cmap,
            filter_by=filter_type,
            top_n=top_n,
            as_matrix=True,
            get_minmax=True,
        )
        output.clear_output()
        with output:
            if len(matrix.columns) < 51 and len(matrix.index) < 51:
                display(matrix.style.format("{:.3f}").background_gradient(cmap=cmap))
            else:
                display(matrix.style.format("{:.3f}"))
                # display(matrix)
    #
    PANEL_HEIGHT = "570px"
    #
    column = widgets.Select(
        options=[z for z in COLUMNS if z in x.columns],
        ensure_option=True,
        disabled=False,
    )
    by = widgets.Select(
        options=[z for z in COLUMNS if z in x.columns],
        ensure_option=True,
        disabled=False,
    )
    method = widgets.Dropdown(
        options=["Pearson", "Kendall", "Spearman"], value="Pearson", disable=False,
    )
    selection_range = widgets.FloatRangeSlider(
        value=[-1.0, 1.0],
        min=-1.0,
        max=1.0,
        step=0.1,
        disabled=False,
        continuous_update=False,
        orientation="horizontal",
        readout=True,
        readout_format="+.1f",
    )
    filter_type = widgets.Dropdown(
        options=["Frequency", "Citation"], value="Frequency", disable=False,
    )
    top_n = widgets.IntSlider(
        value=10,
        min=10,
        max=50,
        step=1,
        disabled=False,
        continuous_update=False,
        orientation="horizontal",
        readout=True,
        readout_format="d",
    )
    cmap = widgets.Dropdown(options=COLORMAPS, disable=False,)
    #
    output = widgets.Output()
    with output:
        display(
            widgets.interactive_output(
                compute_by_term,
                {
                    "column": column,
                    "by": by,
                    "method": method,
                    "minmax": selection_range,
                    "cmap": cmap,
                    "filter_type": filter_type,
                    "top_n": top_n,
                },
            )
        )
    #
    left_box = widgets.VBox(
        [
            widgets.VBox([widgets.Label(value="Term:"), column]),
            widgets.VBox([widgets.Label(value="By term:"), by]),
            widgets.VBox([widgets.Label(value="Method:"), method]),
            widgets.VBox([widgets.Label(value="Filter type:"), filter_type]),
            widgets.VBox([widgets.Label(value="Top n:"), top_n]),
            widgets.VBox([widgets.Label(value="Range:"), selection_range]),
            widgets.VBox([widgets.Label(value="Colormap:"), cmap]),
        ],
        layout=Layout(height=PANEL_HEIGHT, border="1px solid gray"),
    )
    right_box = widgets.VBox([output])

    return AppLayout(
        header=widgets.HTML(value=html_title("Correlation analysis")),
        left_sidebar=left_box,
        center=right_box,
        right_sidebar=None,
        pane_widths=[2, 5, 0],
        pane_heights=["85px", 5, 0],
    )


##
## Debug
##
FIGSIZE = (12, 6)
PANEL_HEIGHT = "410px"


def html_title(x):
    return (
        "<h1>{}</h1>".format(x)
        + "<hr style='height:2px;border-width:0;color:gray;background-color:gray'>"
    )


correlation_analysis(df)

AppLayout(children=(HTML(value="<h1>Correlation analysis</h1><hr style='height:2px;border-width:0;color:gray;b…

In [None]:
dash.summary_by_term(df)

In [None]:
df.columns

In [None]:
caption = widgets.Label(value='Changes in source_range values are reflected in target_range1')
source_range, target_range1 = widgets.IntSlider(description='Source range'),\
                              widgets.IntSlider(description='Target range 1')
dl = widgets.jsdlink((source_range, 'value'), (target_range1, 'value'))
display(caption, source_range, target_range1)

In [None]:
a = widgets.IntSlider(description="Delayed", continuous_update=False)
b = widgets.IntText(description="Delayed", continuous_update=False)
c = widgets.IntSlider(description="Continuous", continuous_update=True)
d = widgets.IntText(description="Continuous", continuous_update=True)

widgets.link((a, 'value'), (b, 'value'))
widgets.link((a, 'value'), (c, 'value'))
widgets.link((a, 'value'), (d, 'value'))
widgets.VBox([a,b,c,d])

In [None]:
caption = widgets.Label(value='The values of range1 and range2 are synchronized')
slider = widgets.IntSlider(min=-5, max=5, value=1, description='Slider')

def handle_slider_change(change):
    caption.value = 'The slider value is ' + (
        'negative' if change.new < 0 else 'nonnegative'
    )

slider.observe(handle_slider_change, names='value')

display(caption, slider)

In [None]:
def menu_descriptive_stats(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_analysis_by_term_per_term_per_year(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_occurrence(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_co_occurrence(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_autocorrelation(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_correlation(x):
    return widgets.Text("TO-DO")

In [None]:
def menu_factor_analysis(x):
    return widgets.Text("TO-DO")

In [None]:
def dashboard(x):
    m = [
        (menu_descriptive_stats, "Descriptive statistics"),
        (menu_analysis_by_year, "Analysis by year"),
        (menu_analysis_by_term, "Analysis by term"),
        (menu_analysis_by_term_per_year, "Analysis by term per year"),
        (menu_occurrence, "Occurrence analysis"),
        (menu_co_occurrence, "Co-occurrence analysis"),
        (menu_autocorrelation, "Autocorrelation analysis"),
        (menu_correlation, "Correlation analysis"),
        (menu_factor_analysis, "Factor analysis"),
    ]
    accordion = widgets.Accordion()
    widget_list = []
    for f, _ in m:
        widget_list.append(f(x))
    accordion.children = widget_list
    for index, t in enumerate(m):
        accordion.set_title(index, t[1])
    return accordion

In [None]:
dashboard(df)

In [None]:
df.columns