In [1]:
!paver local

---> pavement.local
pip3 uninstall techminer
python3 setup.py install develop
running install
running bdist_egg
running egg_info
creating techminer.egg-info
writing techminer.egg-info/PKG-INFO
writing dependency_links to techminer.egg-info/dependency_links.txt
writing top-level names to techminer.egg-info/top_level.txt
writing manifest file 'techminer.egg-info/SOURCES.txt'
reading manifest file 'techminer.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'techminer.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/techminer
copying techminer/thesaurus.py -> build/lib/techminer
copying techminer/dataframe.py -> build/lib/techminer
copying techminer/keywords.py -> build/lib/techminer
copying techminer/records.py -> build/lib/techminer
copying techminer/datasets.py -> build/lib/techminer
copying techminer/__init__.py -> build/lib/techminer

# Test

In [None]:
from techminer.datasets import load_test_cleaned
from techminer.dataframe import DataFrame

rdf = DataFrame(load_test_cleaned().data).generate_ID()
df = rdf.documents_by_term(column="Authors", top_n=10)
print(df)


Plot(df).wordcloud()

# Plot(df).pie()
# Plot(df).pie(cmap=plt.cm.Blues)

# Plot(df).barh()
#Plot(df).barh(cmap=plt.cm.Blues)
#Plot(df).plot('o-', color='red')
#df.plot.barh('Authors', 'Num Documents')


In [None]:
"""
TechMiner.Plot
==================================================================================================




"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from wordcloud import ImageColorGenerator, WordCloud


class Plot:
    def __init__(self, pdf):
        self.pdf = pdf

    def wordcloud(
        self,
        font_path=None,
        width=400,
        height=200,
        margin=2,
        ranks_only=None,
        prefer_horizontal=0.9,
        mask=None,
        scale=1,
        color_func=None,
        max_words=200,
        min_font_size=4,
        stopwords=None,
        random_state=None,
        background_color="black",
        max_font_size=None,
        font_step=1,
        mode="RGB",
        relative_scaling="auto",
        regexp=None,
        collocations=True,
        colormap=None,
        normalize_plurals=True,
        contour_width=0,
        contour_color="black",
        repeat=False,
        include_numbers=False,
        min_word_length=0,
    ):
        """

        >>> import pandas as pd
        >>> import matplotlib.pyplot as plt
        >>> from techminer.datasets import load_test_cleaned
        >>> rdf = load_test_cleaned().data
        >>> rdf.documents_by_terms('Source title').wordcloud()

        .. image:: ../figs/wordcloud.jpg
            :width: 800px
            :align: center                
        """

        x = self.pdf.copy()
        x.pop("ID")

        words = [row[0] for _, row in x.iterrows() for i in range(row[1])]

        wordcloud = WordCloud(
            font_path=font_path,
            width=width,
            height=height,
            margin=margin,
            ranks_only=ranks_only,
            prefer_horizontal=prefer_horizontal,
            mask=mask,
            scale=scale,
            color_func=color_func,
            max_words=max_words,
            min_font_size=min_font_size,
            stopwords=stopwords,
            random_state=random_state,
            background_color=background_color,
            max_font_size=max_font_size,
            font_step=font_step,
            mode=mode,
            relative_scaling=relative_scaling,
            regexp=regexp,
            collocations=collocations,
            colormap=colormap,
            normalize_plurals=normalize_plurals,
            contour_width=contour_width,
            contour_color=contour_color,
            repeat=repeat,
            include_numbers=include_numbers,
            min_word_length=min_word_length,
        )

        wordcloud.generate(" ".join(words))

        plt.gca().imshow(wordcloud, interpolation="bilinear")
        plt.gca().axis("off")

        return plt.gca()

    def pie(
        self,
        cmap=plt.cm.Greys,
        explode=None,
        autopct=None,
        pctdistance=0.6,
        shadow=False,
        labeldistance=1.1,
        startangle=None,
        radius=None,
        counterclock=True,
        wedgeprops=None,
        textprops=None,
        center=(0, 0),
        frame=False,
        rotatelabels=False,
    ):

        x = self.pdf.copy()
        x.pop("ID")

        colors = None

        if cmap is not None:
            colors = [
                cmap(1.0 - 0.9 * (i / len(x))) for i in range(len(x[x.columns[1]]))
            ]

        plt.gca().pie(
            x=x[x.columns[1]],
            explode=explode,
            labels=x[x.columns[0]],
            colors=colors,
            autopct=autopct,
            pctdistance=pctdistance,
            shadow=shadow,
            labeldistance=labeldistance,
            startangle=startangle,
            radius=radius,
            counterclock=counterclock,
            wedgeprops=wedgeprops,
            textprops=textprops,
            center=center,
            frame=frame,
            rotatelabels=rotatelabels,
        )

        return plt.gca()

    def plot(self, *args, scalex=True, scaley=True, **kwargs):

        x = self.pdf.copy()
        x.pop("ID")

        plt.gca().plot(
            range(len(x)),
            x[x.columns[1]],
            *args,
            scalex=scalex,
            scaley=scaley,
            **kwargs,
        )

        plt.xticks(
            np.arange(len(x[x.columns[0]])), x[x.columns[0]], rotation="vertical"
        )
        plt.xlabel(x.columns[0])
        plt.ylabel(x.columns[1])

        plt.gca().spines["top"].set_visible(False)
        plt.gca().spines["right"].set_visible(False)
        plt.gca().spines["left"].set_visible(False)
        plt.gca().spines["bottom"].set_visible(False)

        return plt.gca()

    def barh(self, height=0.8, left=None, align="center", cmap=None, **kwargs):

        x = self.pdf.copy()
        x.pop("ID")

        if cmap is not None:

            kwargs["color"] = [
                cmap((0.2 + 0.75 * x[x.columns[1]][i] / max(x[x.columns[1]])))
                for i in range(len(x[x.columns[1]]))
            ]

        plt.gca().barh(
            y=range(len(x)),
            width=x[x.columns[1]],
            height=height,
            left=left,
            align=align,
            **kwargs,
        )

        plt.gca().invert_yaxis()

        plt.yticks(np.arange(len(x[x.columns[0]])), x[x.columns[0]])
        plt.xlabel(x.columns[1])
        plt.ylabel(x.columns[0])

        plt.gca().spines["top"].set_visible(False)
        plt.gca().spines["right"].set_visible(False)
        plt.gca().spines["left"].set_visible(False)
        plt.gca().spines["bottom"].set_visible(False)

        return plt.gca()

    def bar(self, width=0.8, bottom=None, align="center", cmap=plt.cm.Greys, **kwargs):

        x = self.pdf.copy()
        x.pop("ID")

        if cmap is not None:

            kwargs["color"] = [
                cmap((0.2 + 0.75 * x[x.columns[1]][i] / max(x[x.columns[1]])))
                for i in range(len(x[x.columns[1]]))
            ]

        result = plt.gca().bar(
            x=range(len(x)),
            height=x[x.columns[1]],
            width=width,
            bottom=bottom,
            align=align,
            **({}),
            **kwargs,
        )
        plt.xticks(
            np.arange(len(x[x.columns[0]])), x[x.columns[0]], rotation="vertical"
        )
        plt.xlabel(x.columns[0])
        plt.ylabel(x.columns[1])

        plt.gca().spines["top"].set_visible(False)
        plt.gca().spines["right"].set_visible(False)
        plt.gca().spines["left"].set_visible(False)
        plt.gca().spines["bottom"].set_visible(False)

        # plt.gca().invert_yaxis()

        return plt.gca()

    # ----------------------------------------------------------------------------------------------------

    def heatmap(
        self,
        ascending_r=None,
        ascending_c=None,
        alpha=None,
        norm=None,
        cmap=plt.cm.Greys,
        vmin=None,
        vmax=None,
        data=None,
        **kwargs
    ):
        """Heat map.


        https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html

            'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds',
            'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu',
            'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn'


        >>> from techminer.datasets import load_test_cleaned
        >>> from techminer.dataframe import DataFrame
        >>> rdf = DataFrame(load_test_cleaned().data).generate_ID()
        >>> result = rdf.co_ocurrence(column_r='Authors', column_c='Document Type', top_n=5)
        >>> from techminer.plot import Plot
        >>> Plot(result).heatmap()

        .. image:: ../figs//heatmap.jpg
            :width: 600px
            :align: center

        """

        x = self.pdf.copy()
        x.pop("ID")
        x = pd.pivot_table(
            data=x,
            index=x.columns[0],
            columns=x.columns[1],
            margins=False,
            fill_value=0,
        )
        x.columns = [b for _, b in x.columns]
        result = plt.gca().pcolor(
            x.values,
            alpha=alpha,
            norm=norm,
            cmap=cmap,
            vmin=vmin,
            vmax=vmax,
            data=data,
            **({}),
            **kwargs,
        )
        plt.xticks(np.arange(len(x.index)) + 0.5, x.index, rotation="vertical")
        plt.yticks(np.arange(len(x.columns)) + 0.5, x.columns)
        plt.gca().invert_yaxis()

        return plt.gca()

        # ## force the same order of cells in rows and cols ------------------------------------------
        # if self._call == 'auto_corr':
        #     if ascending_r is None and ascending_c is None:
        #         ascending_r = True
        #         ascending_c = True
        #     elif ascending_r is not None and ascending_r != ascending_c:
        #         ascending_c = ascending_r
        #     elif ascending_c is not None and ascending_c != ascending_r:
        #         ascending_r = ascending_c
        #     else:
        #         pass
        # ## end -------------------------------------------------------------------------------------

        x = self.tomatrix(ascending_r, ascending_c)

        ## rename columns and row index
        # x.columns = [cut_text(w) for w in x.columns]
        # x.index = [cut_text(w) for w in x.index]

        # if self._call == 'factor_analysis':
        #     x = self.tomatrix(ascending_r, ascending_c)
        #     x = x.transpose()
        #     plt.pcolor(np.transpose(abs(x.values)), cmap=cmap)
        # else:
        #     plt.pcolor(np.transpose(x.values), cmap=cmap)

        # plt.xticks(np.arange(len(x.index))+0.5, x.index, rotation='vertical')
        # plt.yticks(np.arange(len(x.columns))+0.5, x.columns)
        # plt.gca().invert_yaxis()

        ## changes the color of rectangle for autocorrelation heatmaps ---------------------------

        # if self._call == 'auto_corr':
        #     for idx in np.arange(len(x.index)):
        #         plt.gca().add_patch(
        #             Rectangle((idx, idx), 1, 1, fill=False, edgecolor='red')
        #         )

        ## end ------------------------------------------------------------------------------------

        ## annotation
        # for idx_row, row in enumerate(x.index):
        #     for idx_col, col in enumerate(x.columns):

        #         if self._call in ['auto_corr', 'cross_corr', 'factor_analysis']:

        #             if abs(x.at[row, col]) > x.values.max() / 2.0:
        #                 color = 'white'
        #             else:
        #                 color = 'black'

        #             plt.text(
        #                 idx_row + 0.5,
        #                 idx_col + 0.5,
        #                 "{:3.2f}".format(x.at[row, col]),
        #                 ha="center",
        #                 va="center",
        #                 color=color)

        #         else:
        #             if x.at[row, col] > 0:

        #                 if x.at[row, col] > x.values.max() / 2.0:
        #                     color = 'white'
        #                 else:
        #                     color = 'black'

        #                 plt.text(
        #                     idx_row + 0.5,
        #                     idx_col + 0.5,
        #                     int(x.at[row, col]),
        #                     ha="center",
        #                     va="center",
        #                     color=color)

        # plt.tight_layout()
        # plt.show()