In [1]:
# Install latest version from GitHub
!pip install -q -U git+https://github.com/jdvelasq/techminer

# Correlation matrix (Experimental)

In [7]:
import matplotlib.pyplot as plt
import pandas as pd

from techminer import DataFrame, Plot, heatmap

#
# Data loading
#
df = DataFrame(
    pd.read_json(
        "https://raw.githubusercontent.com/jdvelasq/techminer/master/data/tutorial/"
        + "cleaned-data.json",
        orient="records",
        lines=True,
    )
)

#
# Columns of the dataframe
#
df.columns

Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume',
       'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by',
       'DOI', 'Affiliations', 'Document Type', 'Access Type', 'Source', 'EID',
       'Abstract', 'Author Keywords', 'Index Keywords', 'References',
       'keywords', 'CONF', 'fingerprint', 'keywords_cleaned', 'ID',
       'top_10_Authors_freq', 'top_10_keywords_freq',
       'top_10_Source_title_freq', 'top_10_Authors_cited_by',
       'top_10_keywords_cited_by', 'top_10_Source_title_cited_by'],
      dtype='object')

## Authors by Source keywords

In [8]:
#
# Top 20 most frequent authors
#
top_authors = df.documents_by_term('Authors').head(20).Authors
top_authors

0        Arevalo A.
1        Gabbouj M.
2      Hernandez G.
3      Hussain A.J.
4      Iosifidis A.
5     Kanniainen J.
6           Leon D.
7           Nino J.
8       Passalis N.
9       Sandoval J.
10         Tefas A.
11      Al-Askar H.
12    Al-Jumeily D.
13         Bohte S.
14      Borovykh A.
15            Bu H.
16        Dash P.K.
17     Di Persio L.
18       Honchar O.
19        Liu C.-L.
Name: Authors, dtype: object

In [9]:
matrix = df.corr(column="Authors", sep=",", by="keywords", as_matrix = True)
matrix = matrix.loc[top_authors,top_authors]
matrix

Unnamed: 0,Arevalo A.,Gabbouj M.,Hernandez G.,Hussain A.J.,Iosifidis A.,Kanniainen J.,Leon D.,Nino J.,Passalis N.,Sandoval J.,Tefas A.,Al-Askar H.,Al-Jumeily D.,Bohte S.,Borovykh A.,Bu H.,Dash P.K.,Di Persio L.,Honchar O.,Liu C.-L.
Arevalo A.,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gabbouj M.,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Hernandez G.,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Hussain A.J.,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.816497,0.816497,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Iosifidis A.,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Kanniainen J.,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Leon D.,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Nino J.,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Passalis N.,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sandoval J.,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
