In [1]:
# Install latest version from GitHub
!pip install -q -U git+https://github.com/jdvelasq/techminer

# Direct queries over data

In [2]:
#
# Data loading
#
import pandas as pd

url = "https://raw.githubusercontent.com/jdvelasq/techminer/master/data/tutorial/"

df = pd.read_json(url + "cleaned-data.json", orient="records", lines=True)

In [3]:
from techminer import DataFrame

df = DataFrame(df)

Some directec queries can be made directly over the dataframe without recurring to special functions or code.

In [4]:
#
# Number of records in the dataframe
#
len(df)

145

In [5]:
df.columns

Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume',
       'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by',
       'DOI', 'Affiliations', 'Document Type', 'Access Type', 'Source', 'EID',
       'Abstract', 'Author Keywords', 'Index Keywords', 'References',
       'keywords', 'CONF', 'fingerprint', 'keywords_cleaned', 'ID',
       'top_10_Authors_freq', 'top_10_keywords_freq',
       'top_10_Source_title_freq', 'top_10_Authors_cited_by',
       'top_10_keywords_cited_by', 'top_10_Source_title_cited_by'],
      dtype='object')

In [6]:
df.count_terms('Author Keywords')

407

In [7]:
#
# Number 
#
df.count_report()

Unnamed: 0,Column,Number of items
0,Authors,434
1,Author(s) ID,434
2,Source title,103
3,Author Keywords,407
4,Index Keywords,884


In [8]:
#
# Top N most cited documents
#
df.citations_by_term('Title').head(10)

Unnamed: 0,Title,Cited by,ID
0,Forecasting stock markets using wavelet transf...,188,[141]
1,Deep learning with long short-term memory netw...,49,[62]
2,Dynamic Ridge Polynomial Neural Network: Forec...,42,[140]
3,Deep learning for stock prediction using numer...,37,[124]
4,A hybrid intelligent model based on recurrent ...,28,[135]
5,A neuro-wavelet model for the short-term forec...,20,[132]
6,Forecasting stock prices from the limit order ...,20,[114]
7,Artificial neural networks architectures for s...,19,[127]
8,Financial Time Series Prediction Using Elman R...,19,[128]
9,Stock prediction using deep learning,16,[113]


In [9]:
#
# IDs can be collected
#
[e for x in df.citations_by_term('Title').head(10)['ID'] for e in x]

[141, 62, 140, 124, 135, 132, 114, 127, 128, 113]

In [10]:
IDs = [e for x in df.citations_by_term('Title').head(10)['ID'] for e in x]
df[df.ID.map(lambda x: x in IDs)][['Title', 'Authors', 'Cited by']].sort_values('Cited by', ascending = False)

Unnamed: 0,Title,Authors,Cited by
141,Forecasting stock markets using wavelet transf...,"Hsieh T.-J.,Hsiao H.-F.,Yeh W.-C.",188.0
62,Deep learning with long short-term memory netw...,"Fischer T.,Krauss C.",49.0
140,Dynamic Ridge Polynomial Neural Network: Forec...,"Ghazali R.,Hussain A.J.,Liatsis P.",42.0
124,Deep learning for stock prediction using numer...,"Akita R.,Yoshihara A.,Matsubara T.,Uehara K.",37.0
135,A hybrid intelligent model based on recurrent ...,"Sharma V.,Srinivasan D.",28.0
114,Forecasting stock prices from the limit order ...,"Tsantekidis A.,Passalis N.,Tefas A.,Kanniainen...",20.0
132,A neuro-wavelet model for the short-term forec...,"Ortega L.F.,Khashanah K.",20.0
127,Artificial neural networks architectures for s...,"Di Persio L.,Honchar O.",19.0
128,Financial Time Series Prediction Using Elman R...,"Wang J.(5),Wang J.(1),Fang W.,Niu H.",19.0
113,Stock prediction using deep learning,"Singh R.,Srivastava S.",16.0
