In [2]:
import pandas as pd
import json

In [5]:
## Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()

## Importing excel sheet from Google Drive

In [6]:
# https://docs.google.com/spreadsheets/d/1Yu0SF_99P_Qx3_-PZgxR78mUeFMsp5JB/edit?usp=sharing&ouid=100169992130227570752&rtpof=true&sd=true
url_id = config['url_id']
url_base = "https://docs.google.com/spreadsheets/d/"
url_e = "export/format=xlsx"
file = pd.ExcelFile(f"{url_base}{url_id}{url_e}")  
with pd.ExcelFile(file) as xls:  
    df = pd.read_excel(xls, "cleaner table", header=0)
    df = df.iloc[:,:44]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 868 entries, 0 to 867
Data columns (total 44 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Authors                                  868 non-null    object 
 1   Author full names                        856 non-null    object 
 2   Author(s) ID                             855 non-null    object 
 3   Title                                    867 non-null    object 
 4   Year                                     868 non-null    int64  
 5   Source title                             868 non-null    object 
 6   Volume                                   836 non-null    float64
 7   Issue                                    509 non-null    object 
 8   Art. No.                                 518 non-null    object 
 9   Page start                               351 non-null    object 
 10  Page end                                 351 non-n

## Converting columns to boolean

In [70]:
columns_to_boolean = ["AI (yes/no)", 
            "Sustainability (yes/no)", 
            "AI as buzzword? (0/1)",
            "policy recommendations (1/0)"]

In [71]:
df[columns_to_boolean] = df[columns_to_boolean].astype('bool')

In [73]:
df

Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,Volume,Issue,Art. No.,Page start,...,Sus_lvl,empirical/conceptual/review,spatial scale,snapshot in time vs. longitudinal study,"temporal scale (past, present, future)",qualitative/quantitative/mixed methods,location of the study (country),dataset used,first_author_country,policy recommendations (1/0)
0,Thiebes S.; Lins S.; Sunyaev A.,"Thiebes, Scott (56319399400); Lins, Sebastian ...",56319399400; 56318996100; 24779131200,Trustworthy artificial intelligence,2021,Electronic Markets,31.0,2,,447,...,weak,conceptual,,,,,,,Germany,False
1,Ho J.-H.; Lee G.-G.; Lu M.-T.,"Ho, Juin-Hao (57218510909); Lee, Gwo-Guang (74...",57218510909; 7404852393; 55801461400,Exploring the implementation of a legal AI bot...,2020,Sustainability (Switzerland),12.0,15,5991,,...,weak,empirical,national,snapshot,present,quantitative,Taiwan,survey,Taiwan,False
2,Bartmann M.,"Bartmann, Marius (56512092600)",5.65E+10,The Ethics of AI-Powered Climate Nudging—How M...,2022,Sustainability (Switzerland),14.0,9,5153,,...,weak,conceptual,,,,,,,Germany,False
3,Halsband A.,"Halsband, Aurélie (57562370000)",5.76E+10,Sustainable AI and Intergenerational Justice,2022,Sustainability (Switzerland),14.0,7,3922,,...,strong,conceptual,,,,,,,Germany,False
4,Raman R.; Kumar Nair V.; Nedungadi P.; Ray I.;...,"Raman, Raghu (36618183700); Kumar Nair, Vinith...",36618183700; 57647914700; 36069838600; 5883060...,"Darkweb research: Past, present, and future tr...",2023,Heliyon,9.0,11,e22269,,...,strong,review,,,,mixed,,,India,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,Zhang Y.; Ji Y.; Qian H.,"Zhang, Yang (57471054500); Ji, Yuanhui (572047...",57471054500; 57204776640; 55186013100,Progress in thermodynamic simulation and syste...,2021,Green Chemical Engineering,2.0,3,,266,...,weak,Review,,,,,,,China,False
864,Jang J.; Kyun S.,"Jang, Jiyoung; Kyun, Suna",,An Innovative Career Management Platform Empow...,2022,"Journal of Logistics, Informatics and Service ...",9.0,1,,274,...,weak,conceptual,,,,,,,South South Korea,False
865,Kabukye J.K.; Namugga J.; Mpamani C.J.; Katumb...,"Kabukye, Johnblack K.; Namugga, Jane; Mpamani,...",57205140187; 57201368167; 57403715300; 3623907...,Implementing Smartphone-Based Telemedicine for...,2023,Journal of Medical Internet Research,25.0,1,e45132,,...,weak,empirical,regional,snapshot,present,qualitative,Uganda,,Sweden,False
866,Deng M.; Liu Y.; Chen L.,"Deng, Meizhen; Liu, Yimeng; Chen, Ling",58606568400; 58605588800; 57700546300,AI-driven innovation in ethnic clothing design...,2023,Electronic Research Archive,31.0,9,,5793,...,weak,empirical,local,snapshot,present,mixed,Biasha,,China,False


## Selecting articles metadata

In [61]:
articles_df = df.loc[:,["EID", "Title", "Year",
                        "SDG", "Source title", 
                        "Cited by", "DOI", 
                        "Abstract", "Author Keywords", "Index Keywords", 
                        "AI (yes/no)", "Sustainability (yes/no)" ]]
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 868 entries, 0 to 867
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   EID                      868 non-null    object
 1   Title                    867 non-null    object
 2   Year                     868 non-null    int64 
 3   SDG                      868 non-null    int64 
 4   Source title             868 non-null    object
 5   Cited by                 868 non-null    int64 
 6   DOI                      862 non-null    object
 7   Abstract                 837 non-null    object
 8   Author Keywords          621 non-null    object
 9   Index Keywords           565 non-null    object
 10  AI (yes/no)              868 non-null    int64 
 11  Sustainability (yes/no)  868 non-null    int64 
dtypes: int64(5), object(7)
memory usage: 81.5+ KB


In [62]:
articles_df[["AI (yes/no)", "Sustainability (yes/no)"]] = articles_df[["AI (yes/no)",
                                                                       "Sustainability (yes/no)"]].astype('bool')

## Function to separate values in the same cell

In [52]:
def separating(df, old_column, new_column):
    df[new_column] = df[old_column].str.split(';')
    df = df.explode(new_column)
    df = df.drop(columns=old_column).reset_index(drop=True)
    df[new_column] = df[new_column].str.strip()
    return df

In [74]:
df_ai = df.loc[:, ["EID", "Type of AI"]]

In [75]:
new_df = separating(df_ai, "Type of AI", "AI_type")
new_df

Unnamed: 0,EID,AI_type
0,2-s2.0-85148853990,overall
1,2-s2.0-85168710066,overall
2,2-s2.0-85174445734,overall
3,2-s2.0-85139602753,overall
4,2-s2.0-85185331385,nlp
...,...,...
1069,2-s2.0-85174748358,machine learning
1070,2-s2.0-85171645006,unsupervised machine learning
1071,2-s2.0-85171645006,nlp
1072,2-s2.0-85171645006,deep learning


In [76]:
new_df["AI_type"].unique()

array(['overall', 'nlp', 'deep learning', 'supervised machine learning',
       'machine learning', 'IDSS', 'unspervised machine learning',
       'evolutionary algorithm', 'fuzzy logic',
       'unsupervised machine learning', 'Google Earth Engine',
       'genetic algorithm', 'reinforcement learning', 'Symbolic AI',
       'MAS', 'evolutionary algorithms', 'Edge AI', 'Fuzzy Logic',
       'Machine Learning', 'MCDM AI', 'GMDH', 'deep learnimg',
       'evolutionary algorithsm', 'evolutionary algoriths',
       'artificial neural network', 'EAI', 'LLM', 'geospatial AI',
       'generative AI', 'unsupervisde machine learning', 'quantum AI',
       'explainable AI', 'supervised machine learningmachine learning',
       'machine leraning', 'Fuzzy logic'], dtype=object)

In [65]:
new_df.loc[244, "AI_type"]

'deep learning'