# Test other APIs

## Imports

In [86]:
from startupjh.data_collection import unpaywall_api
from startupjh.data_collection import doaj_api
from startupjh.data_collection import core_api
from startupjh.data_collection import google_api
from startupjh import data_preprocess

import pandas as pd
import numpy as np

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Test `unpaywall_api()`

In [102]:
df_unpaywall = unpaywall_api.unpaywall_api()

Enter key words: automation container terminal


In [103]:
authors = []
affiliations = []
for index, row in df_unpaywall.iterrows():
    authors_list = []
    affiliation_list = []
    for author in row['authors']:
        affiliation = []
        if ('given' in author) and ('family' in author):
            author_fullname = author['given'] + " " + author['family']
        elif ('given' not in author) and ('family' in author):
            author_fullname = author['family']
        elif ('given' in author) and ('family' not in author):
            author_fullname = author['given']
        else:
            author_fullname = author['name']
        if 'affiliation' in author:
            #print(author['affiliation'])
            for e in author['affiliation']:
                affiliation.append(e['name'])
        #else: 
            #affiliation = np.nan
        authors_list.append(author_fullname)
        affiliation_list.append(affiliation)
    authors.append(authors_list)
    affiliations.append(affiliation_list)

In [104]:
df_unpaywall.authors = authors
df_unpaywall['affiliations'] = affiliations

In [105]:
df_unpaywall = data_preprocess.extract_key_words(df_unpaywall)
df_unpaywall

Unnamed: 0,title,doi,genre,is_oa,journal_is_oa,journal_name,published_date,publisher,authors,affiliations,key_words
0,Automated Container Terminals,10.1201/9780849307744-3,book-chapter,False,False,Intelligent Freight Transportation,2008-03-24,CRC Press,[Edmond Dougherty],[[]],"[automated, container, terminals]"
1,Case: The Automated Container Terminal,10.1007/978-1-84800-177-0_11,book-chapter,False,False,The Delft Systems Approach,,Springer London,[],[],"[case, automated, container, terminal]"
2,Automated Container Terminal Concepts,10.1201/9780849307744-2,book-chapter,False,False,Intelligent Freight Transportation,2008-03-24,CRC Press,"[Petros Ioannou, Hossein Jula]","[[], []]","[automated, container, terminal, concepts]"
3,Petri Net Modeling and Analysis of Automated C...,10.3141/1782-09,journal-article,False,False,Transportation Research Record: Journal of the...,2002-01-01,SAGE Publications,"[Chin-I. Liu, P. A. Ioannou]",[[Center for Advanced Transportation Technolog...,"[petri, net, modeling, analysis, automated, co..."
4,Automated Container Terminal Design at the POLA,10.1061/9780784413067.078,proceedings-article,False,False,Ports 2013,2013-08-12,American Society of Civil Engineers,"[Adrienne Fedrick, Christina Sar, Milind Desai]","[[], [], []]","[automated, container, terminal, design, pola]"
5,Planning for the TraPac Automated Container Te...,10.1061/9780784413067.192,proceedings-article,False,False,Ports 2013,2013-08-12,American Society of Civil Engineers,"[Louis J. Di Meglio, Mark Sisson]","[[], []]","[planning, trapac, automated, container, termi..."
6,Routing of AGVs on automated container terminals,10.1109/cscwd.2015.7230993,proceedings-article,False,False,2015 IEEE 19th International Conference on Com...,2015-05-01,IEEE,"[Mark B. Duinkerken, Gabriel Lodewijks]","[[], []]","[routing, agvs, automated, container, terminals]"
7,Simulation study on terminal layout in automat...,10.1016/j.ocecoaman.2021.105882,journal-article,False,False,Ocean & Coastal Management,2021-11-01,Elsevier BV,"[Xiangda Li, Yun Peng, Jun Huang, Wenyuan Wang...","[[], [], [], [], []]","[simulation, study, terminal, layout, automate..."
8,Comparison of vehicle types at an automated co...,10.1007/3-540-26686-0_2,book-chapter,False,False,Container Terminals and Automated Transport Sy...,,Springer-Verlag,"[Iris F. A. Vis, Ismael Harika]","[[], []]","[comparison, vehicle, types, automated, contai..."
9,Simulating Dispatching Strategies for Automate...,10.1007/3-540-32539-5_16,book-chapter,False,False,Operations Research Proceedings 2005,2006-01-01,Springer Berlin Heidelberg,"[Dirk Briskorn, Sönke Hartmann]","[[], []]","[simulating, dispatching, strategies, automate..."


## Test `doaj_api()`

In [7]:
df_doaj = doaj_api.doaj_api()

Enter key words: automation container terminal


In [8]:
# Get author names and affiliations
authors = []
affiliations = []
for index, row in df_doaj.iterrows():
    author_list = []
    affiliation_list = []
    for author in row.author:
        author_list.append(author['name'])
        affiliation_list.append(author['affiliation'])
    authors.append(author_list)
    affiliations.append(affiliation_list)

In [9]:
df_doaj.author = authors
df_doaj['affiliations'] = affiliations

In [10]:
# Get journal name, oa, and publisher
journal_name = []
journal_is_oa = []
publisher = []
for index, row in df_doaj.iterrows():
    journal_name.append(row.journal['title'])
    journal_is_oa.append(row.journal['is_oa'])
    publisher.append(row.journal['publisher'])

In [11]:
df_doaj['journal_name'] = journal_name
df_doaj['journal_is_oa'] = journal_is_oa
df_doaj['publisher'] = publisher

In [12]:
# Get published date, by default the day of the month is set to 1
published_date = []
for index, row in df_doaj.iterrows():
    if row.pub_month:
        if len(row.pub_month) == 1:
            month = "0"+row.pub_month
        else: 
            month = row.pub_month
    else:
        month = "06"
    date = row.pub_year+"-"+month+"-"+"01"
    published_date.append(date)

In [13]:
df_doaj['published_date'] = published_date

In [15]:
# Get number of pages
number_of_pages = []
for index, row in df_doaj.iterrows():
    if (row.start_page) and (row.end_page):
        number_pages = int(row.end_page) - int(row.start_page)
    else:
        number_pages = ""
    number_of_pages.append(number_pages)

In [16]:
df_doaj['number_of_pages'] = number_of_pages

In [17]:
df_doaj.drop(labels = ['pub_year', 'pub_month', 'start_page', 'end_page', 'journal'], axis=1, inplace = True)
df_doaj = df_doaj.rename(columns={'keywords': 'key_words', 'author': 'authors'})
df_doaj

Unnamed: 0,title,key_words,authors,link,abstract,affiliations,journal_name,journal_is_oa,publisher,published_date,number_of_pages
0,A Container Horizontal Positioning Method with...,"[Container horizontal positioning, Automated c...","[ FU Yonghua, WANG Xuefeng, MI Chao, BIAN Zhic...",http://www.sensorsportal.com/HTML/DIGEST/march...,Automation is a trend for large container term...,[Logistic Research Center Shanghai Maritime Un...,Sensors & Transducers,True,"IFSA Publishing, S.L.",2014-03-01,6.0
1,Automation of the Road Gate Operations Process...,"[container terminal, optical camera recognitio...","[Karol Moszyk, Mariusz Deja, Michal Dobrzynski]",https://www.mdpi.com/2071-1050/13/11/6291,The future increased terminal capacity will re...,"[Doctoral Studies-Implementation Doctorate, Fa...",Sustainability,,MDPI AG,2021-06-01,
2,Feeder Topology Configuration and Application ...,"[feeder topology, IEC 61850, information model...","[Haotian Ge, Bingyin Xu, Xinhui Zhang, Yongjia...",https://www.mdpi.com/1996-1073/14/14/4230,Distribution automation (DA) and Internet of T...,"[Electrical Engineering, Shandong University o...",Energies,,MDPI AG,2021-07-01,
3,Review of horizontal transport decision proble...,"[intermodal terminal, TEU, horizontal transport]",[Roland Jachimowski],http://aot.publisherspanel.com/gicid/01.3001.0...,The article is a review of the transport probl...,"[Warsaw University of Technology, Faculty of T...",Archives of Transport,,"Faculty of Transport, Warsaw University of Tec...",2017-11-01,10.0
4,Evaluating the impact of long cargo dwell time...,"[dwell time, quay crane scheduling, terminal o...","[Monde Aminatou, Yang Jiaqi, Stephen Okyere]",http://aot.publisherspanel.com/gicid/01.3001.0...,Create as part of the concession agreement sig...,"[School of Transportation, Wuhan University of...",Archives of Transport,,"Faculty of Transport, Warsaw University of Tec...",2018-06-01,13.0
5,Automated Container Terminal Production Operat...,,"[Yu Li, Daofang Chang, Yinping Gao, Ying Zou, ...",http://dx.doi.org/10.1155/2021/1936764,"Digital twin (DT), machine learning, and indus...",[Institute of Logistics Science and Engineerin...,Journal of Advanced Transportation,,Hindawi-Wiley,2021-06-01,
6,Storage Allocation in Automated Container Term...,"[storage allocation, automated container termi...","[Mengjue Xia, Ning Zhao, Weijian Mi]",https://doi.org/10.1515/pomr-2016-0061,Nowadays automation is a trend of container te...,"[Scientific Research Academy, China, Logistics...",Polish Maritime Research,,Sciendo,2016-10-01,14.0
7,An Attention Mechanism Oriented Hybrid CNN-RNN...,,"[Bin Li, Yuqing He]",http://dx.doi.org/10.1155/2021/3846078,The booming computational thinking and deep le...,[School of Mechanical and Automotive Engineeri...,Computational Intelligence and Neuroscience,,Hindawi Limited,2021-06-01,
8,A novel algorithm of ship structure modeling a...,,"[Yang Miao, Changan Li, Zhan Li, Yipeng Yang, ...",https://doi.org/10.1177/0020294021992804,Achieving port automation of machinery at bulk...,[The Research Institute of Intelligent Control...,Measurement + Control,,SAGE Publishing,2021-03-01,
9,The concept of the development of cargo contai...,"[automation, supply chain, innovative transpor...","[Bernd Hentschel, Gerhard Richtsteig, Karol Gó...",http://www.logforum.net/vol8/issue3/no5/8_3_5_...,Background: The transport of luggage units bet...,[BERLINOXX Projektzentrum Logistiknetzwerke Be...,LogForum,True,Poznań School of Logistics,2012-09-01,


## Test `core_api()`

In [38]:
df_core = core_api.core_api()

Enter key words: automation container terminal


In [39]:
authors = []
for index, row in df_core.iterrows():
    author_list = []
    for author in row.authors:
        if "," in author['name']:
            author_fullname = author['name'].split(",")
            author_name = author_fullname[1].strip()+" "+author_fullname[0]
        else:
            author_name = author['name']
        author_list.append(author_name)
    authors.append(author_list)

In [40]:
df_core.authors = authors

In [41]:
f = lambda x: x.rstrip('T00:00:00+00:00')
published_date = df_core.published_date.apply(f)

In [42]:
df_core.published_date = published_date

In [43]:
df_core = data_preprocess.extract_key_words(df_core)
df_core = df_core.rename(columns={'download_url':'link'})
df_core.drop(labels=['tags'], axis=1, inplace=True)

Unnamed: 0,authors,abstract,document_type,doi,link,full_text,title,language,published_date,publisher,references,key_words
0,[Bagas Pamungkas],Seiring meningkatnya volume arus petikemas dan...,,,,,Analisis Penerapan Otomasi Pada Terminal Petik...,,2020-08-01,,[],"[analisis, penerapan, otomasi, terminal, petik..."
1,"[Takeshi Shinoda, Hideyo Inutsuka, Putu Hangga...",With growing ship size and demand for effectiv...,,10.12962/j25481479.v2i2.3653,https://core.ac.uk/download/290095071.pdf,International Journal of Marine Engineering In...,Evaluation of Container Terminal Arrangement B...,"{'code': 'en', 'name': 'English'}",2018-03-25,'Lembaga Penelitian dan Pengabdian kepada Masy...,[],"[evaluation, container, terminal, arrangement,..."
2,"[Takeshi Shinoda, Hideyo Inutsuka, Putu Hangga...",With growing ship size and demand for effectiv...,,10.12962/j25481479.v2i2.3653,https://core.ac.uk/download/201126069.pdf,International Journal of Marine Engineering In...,Evaluation of Container Terminal Arrangement B...,,2018-03-01,'Lembaga Penelitian dan Pengabdian kepada Masy...,[],"[evaluation, container, terminal, arrangement,..."
3,"[Chairunissa Kusnoaji, Iis Dewi Ratih, Iis Dew...",PT Terminal Petikemas Surabaya is an internati...,,10.12962/j23378557.v7i1.a7493,https://core.ac.uk/download/429413219.pdf,16 ...,Analysis of Transaction Capability in Gate In-...,"{'code': 'en', 'name': 'English'}",2021-04-3,'Lembaga Penelitian dan Pengabdian kepada Masy...,[],"[analysis, transaction, capability, gate, in2,..."
4,"[Chairunissa Kusnoaji, Iis Dewi Ratih, Iis Dew...",PT Terminal Petikemas Surabaya is an internati...,,10.12962/j23378557.v7i1.a7493,https://core.ac.uk/download/429252544.pdf,16 ...,Analysis of Transaction Capability in Gate In-...,"{'code': 'en', 'name': 'English'}",2021-04-3,'Lembaga Penelitian dan Pengabdian kepada Masy...,[],"[analysis, transaction, capability, gate, in2,..."


## Test `google_api()`


In [45]:
primaryResults = google_api.serpapi_full_cite()

Enter key words: automation container terminal


In [46]:
df_google = data_preprocess.extract_key_words(primaryResults)

In [47]:
df_google = data_preprocess.extract_pub_info(df_google)

In [48]:
journal_name = []
for element in df_google.pub_info:
    if ' (' in element:
        split_info = element.split(' (')
    else:
        split_info = element.split('. ')
    journal_name.append(split_info[0])

In [49]:
df_google['journal_name'] = journal_name

In [50]:
published_date = []
for pub_year in df_google.year:
    date = pub_year+"-06-01"
    published_date.append(date)

In [52]:
df_google['published_date'] = published_date
df_google.drop(labels=["versions", "cluster_id", "pub_info", 'paper_id', 'year', 'result_id', 'resources_title', 'resources_link'], axis=1, inplace = True)
df_google = df_google.rename(columns={'resources_link':'link'})

Unnamed: 0,title,link,snippet,citation_count,cites_id,full_citation,key_words,authors,journal_name,published_date
0,A multi-agent system for the automation of a p...,https://www.academia.edu/download/48859036/A_m...,This paper presents a system architecture whic...,74,9309426555546589354,"Rebollo, Miguel, et al. ""A multi-agent system ...","[multiagent, system, automation, port, contain...","Rebollo, Miguel, et al.",Workshop in Agents in Industry,2000-06-01
1,Automation in port container terminals,https://www.sciencedirect.com/science/article/...,… It is necessary to note that the decision to...,50,16200645956702243205,"Martín-Soberón, Ana María, et al. ""Automation ...","[automation, port, container, terminals]","Martín-Soberón, Ana María, et al.",Procedia-Social and Behavioral Sciences 160,2014-06-01
2,Container port automation,https://link.springer.com/content/pdf/10.1007/...,"… The system was handed over to a new company,...",13,2428285333085990610,"Nelmes, Graeme. ""Container port automation."" F...","[container, port, automation]","Nelmes, Graeme.",Field and Service Robotics,2006-06-01
3,TRACES: TRAFFIC CONTROL ENGINEERING SYSTEM A c...,https://citeseerx.ist.psu.edu/viewdoc/download...,In this study a control system to coordinate t...,27,6171787941291428034,"Duinkerken, Mark B., Joseph JM Evers, and Jaap...","[traces, traffic, control, engineering, system...","Duinkerken, Mark B., Joseph JM Evers, and Jaap...",signal 2,1999-06-01
4,Multi-agent system technology in a port contai...,https://www.researchgate.net/profile/V-Botti/p...,In response to the arrival of a ship (ship age...,13,3086980972259741507,"Botti, Vicent J. ""Multi-agent system technolog...","[multiagent, system, technology, port, contain...","Botti, Vicent J.",ERCIM News 56,2004-06-01
5,Alignments between strategic content and proce...,https://link.springer.com/article/10.1057/s412...,"During the last three decades, technological i...",14,14852389085083582985,"Wang, Ping, Joan P. Mileski, and Qingcheng Zen...","[alignments, strategic, content, process, stru...","Wang, Ping, Joan P. Mileski, and Qingcheng Zeng.",Maritime Economics & Logistics 21.4,2019-06-01
6,Performance analysis of a new type of automate...,http://citeseerx.ist.psu.edu/viewdoc/download?...,… can not only improve the utilization rate of...,8,5423973718458925932,"Yan, Wei, Yishi Zhu, and Junliang He. ""Perform...","[performance, analysis, new, type, automated, ...","Yan, Wei, Yishi Zhu, and Junliang He.",International Journal of Hybrid Information 7.2,2014-06-01
7,A Study on Application of Yard Transportaion E...,https://www.koreascience.or.kr/article/JAKO201...,International major container terminals are tr...,5,7027991686810156965,"Cha, Sang-Hyun, and Chang-Kyun Noh. ""A Study o...","[study, application, yard, transportaion, equi...","Cha, Sang-Hyun, and Chang-Kyun Noh.",Journal of Navigation and Port Research 42.3,2018-06-01
8,New technologies and the transformation of wor...,https://onlinelibrary.wiley.com/doi/abs/10.111...,… This study focused on workplace technologica...,31,3038243621657882594,"Gekara, Victor Oyaro, and Vi‐Xuan Thanh Nguyen...","[new, technologies, transformation, work, skil...","Gekara, Victor Oyaro, and Vi‐Xuan Thanh Nguyen.","New Technology, Work and Employment 33.3",2018-06-01
9,Cooperative Scheduling of AGV and ASC in Autom...,https://www.hindawi.com/journals/mpe/2021/5764...,The key problem of operation optimization for ...,1,4551625296024943951,"Zhang, Qinglei, et al. ""Cooperative Scheduling...","[cooperative, scheduling, agv, asc, automation...","Zhang, Qinglei, et al.",Mathematical Problems in Engineering 2021,2021-06-01


## Consolidation of all 4 dataframes

In [112]:
consolidated_df = pd.concat([df_unpaywall, df_doaj, df_core, df_google]).reset_index(drop=True)

In [113]:
consolidated_df.head()

Unnamed: 0,title,doi,genre,is_oa,journal_is_oa,journal_name,published_date,publisher,authors,affiliations,...,abstract,number_of_pages,document_type,full_text,language,references,snippet,citation_count,cites_id,full_citation
0,Automated Container Terminals,10.1201/9780849307744-3,book-chapter,False,False,Intelligent Freight Transportation,2008-03-24,CRC Press,[Edmond Dougherty],[[]],...,,,,,,,,,,
1,Case: The Automated Container Terminal,10.1007/978-1-84800-177-0_11,book-chapter,False,False,The Delft Systems Approach,,Springer London,[],[],...,,,,,,,,,,
2,Automated Container Terminal Concepts,10.1201/9780849307744-2,book-chapter,False,False,Intelligent Freight Transportation,2008-03-24,CRC Press,"[Petros Ioannou, Hossein Jula]","[[], []]",...,,,,,,,,,,
3,Petri Net Modeling and Analysis of Automated C...,10.3141/1782-09,journal-article,False,False,Transportation Research Record: Journal of the...,2002-01-01,SAGE Publications,"[Chin-I. Liu, P. A. Ioannou]",[[Center for Advanced Transportation Technolog...,...,,,,,,,,,,
4,Automated Container Terminal Design at the POLA,10.1061/9780784413067.078,proceedings-article,False,False,Ports 2013,2013-08-12,American Society of Civil Engineers,"[Adrienne Fedrick, Christina Sar, Milind Desai]","[[], [], []]",...,,,,,,,,,,


In [114]:
consolidated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   title            86 non-null     object 
 1   doi              54 non-null     object 
 2   genre            50 non-null     object 
 3   is_oa            50 non-null     object 
 4   journal_is_oa    61 non-null     object 
 5   journal_name     81 non-null     object 
 6   published_date   86 non-null     object 
 7   publisher        65 non-null     object 
 8   authors          86 non-null     object 
 9   affiliations     61 non-null     object 
 10  key_words        86 non-null     object 
 11  link             36 non-null     object 
 12  abstract         16 non-null     object 
 13  number_of_pages  11 non-null     object 
 14  document_type    0 non-null      object 
 15  full_text        4 non-null      object 
 16  language         3 non-null      object 
 17  references       5

In [115]:
# Get the ratio of missing data for each feature
ratios = []
for e in consolidated_df.columns:
    ratio = consolidated_df[e].isna().sum()/len(consolidated_df)
    ratios.append(ratio)
ratios_df = pd.DataFrame(pd.Series(consolidated_df.columns))
ratios_df['ratio'] = ratios
ratios_df

Unnamed: 0,0,ratio
0,title,0.0
1,doi,0.372093
2,genre,0.418605
3,is_oa,0.418605
4,journal_is_oa,0.290698
5,journal_name,0.05814
6,published_date,0.0
7,publisher,0.244186
8,authors,0.0
9,affiliations,0.290698


From the ratios above, we can drop the following columns: genre, is_oa, link, abstract, number_of_pages, document_type, full_text, language, references, snippet, citation_count, cites_id, full_citation

In [116]:
consolidated_df.drop(labels=['doi', 'affiliations', 'genre', 'is_oa', 'link', 'abstract', 'number_of_pages', 
                             'document_type', 'full_text', 'language', 'references', 
                             'snippet', 'citation_count', 'cites_id', 'full_citation'], axis=1, inplace=True)

In [117]:
consolidated_df

Unnamed: 0,title,journal_is_oa,journal_name,published_date,publisher,authors,key_words
0,Automated Container Terminals,False,Intelligent Freight Transportation,2008-03-24,CRC Press,[Edmond Dougherty],"[automated, container, terminals]"
1,Case: The Automated Container Terminal,False,The Delft Systems Approach,,Springer London,[],"[case, automated, container, terminal]"
2,Automated Container Terminal Concepts,False,Intelligent Freight Transportation,2008-03-24,CRC Press,"[Petros Ioannou, Hossein Jula]","[automated, container, terminal, concepts]"
3,Petri Net Modeling and Analysis of Automated C...,False,Transportation Research Record: Journal of the...,2002-01-01,SAGE Publications,"[Chin-I. Liu, P. A. Ioannou]","[petri, net, modeling, analysis, automated, co..."
4,Automated Container Terminal Design at the POLA,False,Ports 2013,2013-08-12,American Society of Civil Engineers,"[Adrienne Fedrick, Christina Sar, Milind Desai]","[automated, container, terminal, design, pola]"
...,...,...,...,...,...,...,...
81,Container terminal handling technology and its...,,Academic Journal of Engineering and Technology...,2019-06-01,,"Li, Hanfeng, and Yuhao Lu.","[container, terminal, handling, technology, de..."
82,Design of GPS Positioning and Control for Fork...,,L,2011-06-01,,"Bing, Z","[design, gps, positioning, control, forklifts,..."
83,Strategic analysis of the automation of contai...,,Logistics 4.1,2020-06-01,,"Camarero Orive, Alberto, et al.","[strategic, analysis, automation, container, p..."
84,Container terminal operation: current trends a...,,Handbook of Ocean Container Transport Logistics,2015-06-01,,"Kim, Kap Hwan, and Hoon Lee.","[container, terminal, operation, current, tren..."
