In [1]:
# Install latest version from GitHub
!pip install -q -U git+https://github.com/jdvelasq/techminer

# Direct queries over data

Some queries can be made directly over the dataframe without recurring to special functions or code.

In [2]:
import pandas as pd

from techminer import DataFrame

#
# Data loading
#
df = DataFrame(
    pd.read_json(
        "https://raw.githubusercontent.com/jdvelasq/techminer/master/data/tutorial/"
        + "cleaned-data.json",
        orient="records",
        lines=True,
    )
)

#
# Columns of the dataframe
#
df.columns

In [3]:
#
# Number of records in the dataframe
#
len(df)

145

In [13]:
#
# Data coverage
#
df.coverage().head()

Unnamed: 0,Column,Number of items,Coverage (%)
0,Authors,145,100.00%
1,Author(s) ID,145,100.00%
2,Title,145,100.00%
3,Year,145,100.00%
4,Source title,145,100.00%


In [6]:
#
# Number of terms 
#
df.count_report()

Unnamed: 0,Column,Number of items
0,Authors,434
1,Author(s) ID,434
2,Source title,103
3,Author Keywords,407
4,Index Keywords,884


In [7]:
#
# Number of terms for individual columns
#
df.count_terms('Author Keywords')

407

In [8]:
#
# Top N most cited documents
#
df.most_cited_documents().head(10)

Unnamed: 0,Title,Authors,Year,Cited by,ID
141,Forecasting stock markets using wavelet transf...,"Hsieh T.-J.,Hsiao H.-F.,Yeh W.-C.",2011,188,141
62,Deep learning with long short-term memory netw...,"Fischer T.,Krauss C.",2018,49,62
140,Dynamic Ridge Polynomial Neural Network: Forec...,"Ghazali R.,Hussain A.J.,Liatsis P.",2011,42,140
124,Deep learning for stock prediction using numer...,"Akita R.,Yoshihara A.,Matsubara T.,Uehara K.",2016,37,124
135,A hybrid intelligent model based on recurrent ...,"Sharma V.,Srinivasan D.",2013,28,135
132,A neuro-wavelet model for the short-term forec...,"Ortega L.F.,Khashanah K.",2014,20,132
114,Forecasting stock prices from the limit order ...,"Tsantekidis A.,Passalis N.,Tefas A.,Kanniainen...",2017,20,114
127,Artificial neural networks architectures for s...,"Di Persio L.,Honchar O.",2016,19,127
128,Financial Time Series Prediction Using Elman R...,"Wang J.(5),Wang J.(1),Fang W.,Niu H.",2016,19,128
113,Stock prediction using deep learning,"Singh R.,Srivastava S.",2017,16,113


In [9]:
#
# Or
#
df.citations_by_term('Title').head(10)

Unnamed: 0,Title,Cited by,ID
0,Forecasting stock markets using wavelet transf...,188,[141]
1,Deep learning with long short-term memory netw...,49,[62]
2,Dynamic Ridge Polynomial Neural Network: Forec...,42,[140]
3,Deep learning for stock prediction using numer...,37,[124]
4,A hybrid intelligent model based on recurrent ...,28,[135]
5,A neuro-wavelet model for the short-term forec...,20,[132]
6,Forecasting stock prices from the limit order ...,20,[114]
7,Artificial neural networks architectures for s...,19,[127]
8,Financial Time Series Prediction Using Elman R...,19,[128]
9,Stock prediction using deep learning,16,[113]


In [10]:
#
# Most cited authors
#
df.most_cited_authors().head()

Unnamed: 0,Authors,Num Documents,Cited by,ID
126,Hsieh T.-J.,1,188,[141]
125,Hsiao H.-F.,1,188,[141]
382,Yeh W.-C.,1,188,[141]
135,Hussain A.J.,3,52,"[125, 133, 140]"
162,Krauss C.,1,49,[62]


In [14]:
#
# Top 10 most cited authors
#
df.most_cited_authors().head(10).Authors

126     Hsieh T.-J.
125     Hsiao H.-F.
382       Yeh W.-C.
135    Hussain A.J.
162       Krauss C.
91       Fischer T.
101      Ghazali R.
188      Liatsis P.
329       Uehara K.
5          Akita R.
Name: Authors, dtype: object

In [15]:
#
# Top 10 most frequent authors
#
df.documents_by_term('Authors').head(10).Authors

0       Arevalo A.
1       Gabbouj M.
2     Hernandez G.
3     Hussain A.J.
4     Iosifidis A.
5    Kanniainen J.
6          Leon D.
7          Nino J.
8      Passalis N.
9      Sandoval J.
Name: Authors, dtype: object

## Record extraction by IDs

In [11]:
#
# IDs for top five documents
#
IDs = df.citations_by_term('Title')['ID'].head(5)
IDs

0    [141]
1     [62]
2    [140]
3    [124]
4    [135]
Name: ID, dtype: object

In [12]:
#
# Selects `Title` and `Authors` by IDs
#
df.get_rows_by_IDs(IDs)[['Title', 'Authors']]

Unnamed: 0,Title,Authors
62,Deep learning with long short-term memory netw...,"Fischer T.,Krauss C."
124,Deep learning for stock prediction using numer...,"Akita R.,Yoshihara A.,Matsubara T.,Uehara K."
135,A hybrid intelligent model based on recurrent ...,"Sharma V.,Srinivasan D."
140,Dynamic Ridge Polynomial Neural Network: Forec...,"Ghazali R.,Hussain A.J.,Liatsis P."
141,Forecasting stock markets using wavelet transf...,"Hsieh T.-J.,Hsiao H.-F.,Yeh W.-C."
