In [1]:
import pandas as pd
import json
from pybliometrics.scopus import AbstractRetrieval, AuthorRetrieval, AffiliationRetrieval
import warnings
import datetime
import sqlalchemy

In [2]:
## Load configuration from json file
con_file = open("config.json")
config = json.load(con_file)
con_file.close()

In [3]:
schema = config["schema"]
host = config["host"]
user = config["user"]
password = config["password"]
port = config["port"]

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
connection_string

'mysql+pymysql://root:centermethods2024$@127.0.0.1:3306/ai_sdgs'

In [4]:
articles_df = pd.read_sql("articles", con=connection_string)
article_type_df = pd.read_sql("type_of_article", con=connection_string)
art_alg_df = pd.read_sql("articles_algorithms", con=connection_string)
algorithms_df = pd.read_sql("algorithms_list", con=connection_string)
groups_df = pd.read_sql("clusters", con=connection_string)

In [7]:
df1 = pd.merge(art_alg_df, groups_df, on='eid', how='inner')

In [10]:
df2 = pd.merge(df1, articles_df, on='eid', how='inner')

In [18]:
df2.loc[df2["algorithms"].isin(["lstm", "k-means", "extreme learning machine"]),]

Unnamed: 0,eid,algorithms,words,cluster_number,cluster_name,article_file,title,year_pub,sdg,journal,cited_by,doi,authors
291,2-s2.0-85113985918,k-means,"camera, pixel, detection, image, localization,...",5,remote sensing,6_e_2-s2.0-85113985918.txt,An innovative machine learning based framework...,2022,6,Structural Health Monitoring,12,10.1177/14759217211040269,Fan X.; Yu X.
292,2-s2.0-85136500537,k-means,"manufacturing, business, chain, competitivenes...",7,industry,9_e_Arunmozhi M._2022_2-s2.0-85136500537.txt,Application of blockchain and smart contracts ...,2022,9,Transportation Research Part E: Logistics and ...,12,10.1016/j.tre.2022.102864,Arunmozhi M.; Venkatesh V.G.; Arisian S.; Shi ...
293,2-s2.0-85137738728,k-means,"camera, pixel, detection, image, localization,...",5,remote sensing,14_e_Mbani_2022_2-s2.0-85137738728.txt,Implementation of an automated workflow for im...,2022,14,Scientific Reports,2,10.1038/s41598-022-19070-2,Mbani B.; Schoening T.; Gazis I.-Z.; Koch R.; ...
294,2-s2.0-85150899019,k-means,"camera, pixel, detection, image, localization,...",5,remote sensing,2_e_Waheed_2023_2-s2.0-85150899019.txt,A Mobile-Based System for Detecting Ginger Lea...,2023,2,Future Internet,2,10.3390/fi15030086,Waheed H.; Akram W.; Islam S.U.; Hadi A.; Boud...
295,2-s2.0-85171645006,k-means,"care, health, patient, healthcare, hospital, p...",1,healthcare,5_e_Deng_2023_2-s2.0-85171645006.txt,AI-driven innovation in ethnic clothing design...,2023,5,Electronic Research Archive,0,10.3934/era.2023295,Deng M.; Liu Y.; Chen L.
329,2-s2.0-85057537994,lstm,"vector, feature, prediction, accuracy, forecas...",3,forecasting,12_e_Yan_2018_2-s2.0-85057537994.txt,Multi-step short-term power consumption foreca...,2018,12,Energies,127,10.3390/en11113089,Yan K.; Wang X.; Du Y.; Jin N.; Huang H.; Zhou H.
330,2-s2.0-85082475744,lstm,"river, rainfall, hydrology, water, square, bas...",4,water,6_e_2-s2.0-85082475744.txt,Computation of evapotranspiration with artific...,2020,6,Applied Sciences (Switzerland),37,10.3390/app10051621,Afzaal H.; Farooque A.A.; Abbas F.; Acharya B....
331,2-s2.0-85088034777,lstm,"river, rainfall, hydrology, water, square, bas...",4,water,6_e_2-s2.0-85088034777.txt,Genetic-algorithm-optimized sequential model f...,2020,6,Sustainability (Switzerland),47,10.3390/su12135374,Stajkowski S.; Kumar D.; Samui P.; Bonakdari H...
332,2-s2.0-85088373071,lstm,"vector, feature, prediction, accuracy, forecas...",3,forecasting,11_e_Schürholz D._2020_2-s2.0-85088373071.txt,Artificial intelligence-enabled context-aware ...,2020,11,Journal of Cleaner Production,35,10.1016/j.jclepro.2020.121941,Schürholz D.; Kubler S.; Zaslavsky A.
333,2-s2.0-85107394966,lstm,"vector, feature, prediction, accuracy, forecas...",3,forecasting,12_e_Codeluppi_2021_2-s2.0-85107394966.txt,Forecasting air temperature on edge devices wi...,2021,12,Sensors,17,10.3390/s21123973,Codeluppi G.; Davoli L.; Ferrari G.


In [9]:
df1 = pd.merge(articles_df, article_type_df, on='eid', how='inner')

In [12]:
df2 = pd.merge(df1, abstracts_df, on='eid', how='inner')

In [14]:
df3 = pd.merge(df2, art_alg_df, on='eid', how='inner')

In [16]:
df4 = pd.merge(df3, algorithms_df, on='algorithms', how='inner')

In [17]:
df5 = pd.merge(df4, alg_ai_df, on='algorithms', how='inner')

In [20]:
df6 = pd.merge(df5, aitype, on='ai_type', how='inner')

In [23]:
df7 = df6.loc[df6["article_type"] == "empirical",].reset_index(drop=True)

In [26]:
df7.loc[df7["ai_type"] == "various", ]

Unnamed: 0,eid,title,year_pub,sdg,journal,cited_by,doi,policy_recommendations,article_type,core_topic,broader_category,abstract,algorithms,algorithm_name,ai_type
3,2-s2.0-33751058457,Hybrid System for fouling control in biomass b...,2006,7,Engineering Applications of Artificial Intelli...,29,10.1016/j.engappai.2006.01.019,0,empirical,Hybrid systems for biomass boiler fouling control,Resource prediction and quality management,Renewable energy sources are essential paths t...,various,various,various
20,2-s2.0-72649093786,A hybrid decision support system for sustainab...,2010,12,Energy and Buildings,287,10.1016/j.enbuild.2009.09.006,0,empirical,Sustainable building renovation,Supply Chain and LCA,Energy consumption of buildings accounts for a...,various,various,various
96,2-s2.0-85048598058,Machine Learning-Augmented Propensity Score-Ad...,2018,4,BioMed Research International,37,10.1155/2018/5051289,0,empirical,AI in preventive cardiology education,Psychology and education,Background. Cardiovascular disease (CVD) annua...,various,various,various
110,2-s2.0-85056204019,Forecasting energy consumption time series usi...,2018,12,Energy,139,10.1016/j.energy.2018.09.144,0,empirical,Residential energy consumption forecasting,Resource prediction and quality management,Energy consumption in buildings is increasing ...,various,various,various
120,2-s2.0-85058120103,An integrated multicriteria analysis tool for ...,2018,6,Water (Switzerland),43,10.3390/w10121795,0,empirical,Irrigation constraint evaluation,Resource prediction and quality management,"Water is involved, directly or indirectly, wit...",various,various,various
179,2-s2.0-85078363262,Leveraging artificial intelligence to improve ...,2019,3,IEEE Access,22,10.1109/ACCESS.2019.2938265,0,empirical,AI for voice disorder identification,"Health, equity and gender","The evolution of the Internet of Things, cloud...",various,various,various
215,2-s2.0-85085638015,Artificial Intelligence-Aided Model Predictive...,2020,7,IEEE Access,27,10.1109/ACCESS.2020.2994577,0,empirical,AI-based predictive control for wind-hydrogen ...,Resource prediction and quality management,Integrating renewable energy into power grids ...,various,various,various
311,2-s2.0-85104629628,Application of machine learning techniques in ...,2021,9,Sustainability (Switzerland),48,10.3390/su13084120,0,empirical,Machine learning in injection molding,Supply chain and LCA,With sustainable growth highlighted as a key t...,various,various,various
429,2-s2.0-85118703265,Performance of machine learning methods in pre...,2021,6,Applied Water Science,109,10.1007/s13201-021-01528-9,0,empirical,Daily streamflow forecasting,Resource prediction and quality management,Groundwater quality appraisal is one of the mo...,various,various,various
453,2-s2.0-85123355204,Deep learning models for improved reliability ...,2022,15,Forest Ecology and Management,7,10.1016/j.foreco.2022.120031,0,empirical,Forest biomass prediction,Resource prediction and quality management,Aboveground biomass (AGB) and carbon uptake of...,various,various,various
