# Add new country column to articles database

In [2]:
import pandas as pd
from time import time

### Read original database

In [3]:
df = pd.read_csv('slam_articles.csv')

### Read location dictionaries

In [4]:
import json

with open('publishers_dict.json') as file:
    publishers_dict = json.loads(file.read())

with open('publisher_locations_dict.json') as file:
    publisher_locations_dict = json.loads(file.read())

with open('universities_dict.json') as file:
    universities_dict = json.loads(file.read())

print('publishers_dict:',len(publishers_dict))
print('publisher_locations_dict:',len(publisher_locations_dict))
print('universities_dict:',len(universities_dict))

publishers_dict: 1561
publisher_locations_dict: 104
universities_dict: 7680


### Create new columns

In [5]:
df['pub_c'] = None
df['pub_loc_c'] = None
df['uni_c'] = None
df.head()

Unnamed: 0,title,DOI,publisher,publisher_location,year,type,score,reference-count,is-referenced-by-count,authors,publisher_affiliation,pub_c,pub_loc_c,uni_c
0,D3VIL-SLAM: 3D Visual Inertial LiDAR SLAM for ...,10.1109/iv55152.2023.10186534,IEEE,,2023,proceedings-article,proceedings-article,23.0,0.0,"Matteo Frosi, Matteo Matteucci",Information and Bioengineering Politecnico di ...,,,
1,Visual and lidar-based SLAM by variational bay...,10.32657/10356/139813,Nanyang Technological University,,2020,dissertation,dissertation,0.0,0.0,", Xiaoyue Jiang",,,,
2,A Comparison of Outdoor 3D Reconstruction betw...,10.1109/cacs60074.2023.10325866,IEEE,,2023,proceedings-article,proceedings-article,10.0,0.0,"Yi-Tian Hong, Han-Pang Huang","National Taiwan University, Department of Mech...",,,
3,Indoor mapping and positioning applications of...,10.51946/melid.927004,Turkiye lidar dergisi (Mersin University),,2021,journal-article,journal-article,29.0,5.0,Mustafa ZEYBEK,,,,
4,SC-LiDAR-SLAM: A Front-end Agnostic Versatile ...,10.1109/iceic54506.2022.9748644,IEEE,,2022,proceedings-article,proceedings-article,46.0,10.0,"Giseop Kim, Seungsang Yun, Jeongyun Kim, Ayoun...","KAIST, Dept. of Civil and Envtl. Eng., Daejeon...",,,


### Fill new column

In [6]:
# add publisher
for key, value in publishers_dict.items():
    df.loc[df['publisher'] == key, "pub_c"] = value

# add publisher location
for key, value in publisher_locations_dict.items():
    df.loc[df['publisher_location'] == key, "pub_loc_c"] = value

# add university
for key, value in universities_dict.items():
    df.loc[df['publisher_affiliation'] == key, "uni_c"] = value

In [7]:
df.head()

Unnamed: 0,title,DOI,publisher,publisher_location,year,type,score,reference-count,is-referenced-by-count,authors,publisher_affiliation,pub_c,pub_loc_c,uni_c
0,D3VIL-SLAM: 3D Visual Inertial LiDAR SLAM for ...,10.1109/iv55152.2023.10186534,IEEE,,2023,proceedings-article,proceedings-article,23.0,0.0,"Matteo Frosi, Matteo Matteucci",Information and Bioengineering Politecnico di ...,,,Italy
1,Visual and lidar-based SLAM by variational bay...,10.32657/10356/139813,Nanyang Technological University,,2020,dissertation,dissertation,0.0,0.0,", Xiaoyue Jiang",,,,
2,A Comparison of Outdoor 3D Reconstruction betw...,10.1109/cacs60074.2023.10325866,IEEE,,2023,proceedings-article,proceedings-article,10.0,0.0,"Yi-Tian Hong, Han-Pang Huang","National Taiwan University, Department of Mech...",,,China
3,Indoor mapping and positioning applications of...,10.51946/melid.927004,Turkiye lidar dergisi (Mersin University),,2021,journal-article,journal-article,29.0,5.0,Mustafa ZEYBEK,,,,
4,SC-LiDAR-SLAM: A Front-end Agnostic Versatile ...,10.1109/iceic54506.2022.9748644,IEEE,,2022,proceedings-article,proceedings-article,46.0,10.0,"Giseop Kim, Seungsang Yun, Jeongyun Kim, Ayoun...","KAIST, Dept. of Civil and Envtl. Eng., Daejeon...",,,South Korea


### Combine country columns with prioritization

In [8]:
# Create the 'country' column with prioritization
df['country'] = df.apply(lambda row: row['uni_c'] if pd.notna(row['uni_c']) 
                               else (row['pub_loc_c'] if pd.notna(row['pub_loc_c']) 
                               else row['pub_c']), axis=1)
df = df.drop(columns=['pub_c', 'pub_loc_c', 'uni_c'])

In [9]:
df.head()

Unnamed: 0,title,DOI,publisher,publisher_location,year,type,score,reference-count,is-referenced-by-count,authors,publisher_affiliation,country
0,D3VIL-SLAM: 3D Visual Inertial LiDAR SLAM for ...,10.1109/iv55152.2023.10186534,IEEE,,2023,proceedings-article,proceedings-article,23.0,0.0,"Matteo Frosi, Matteo Matteucci",Information and Bioengineering Politecnico di ...,Italy
1,Visual and lidar-based SLAM by variational bay...,10.32657/10356/139813,Nanyang Technological University,,2020,dissertation,dissertation,0.0,0.0,", Xiaoyue Jiang",,
2,A Comparison of Outdoor 3D Reconstruction betw...,10.1109/cacs60074.2023.10325866,IEEE,,2023,proceedings-article,proceedings-article,10.0,0.0,"Yi-Tian Hong, Han-Pang Huang","National Taiwan University, Department of Mech...",China
3,Indoor mapping and positioning applications of...,10.51946/melid.927004,Turkiye lidar dergisi (Mersin University),,2021,journal-article,journal-article,29.0,5.0,Mustafa ZEYBEK,,
4,SC-LiDAR-SLAM: A Front-end Agnostic Versatile ...,10.1109/iceic54506.2022.9748644,IEEE,,2022,proceedings-article,proceedings-article,46.0,10.0,"Giseop Kim, Seungsang Yun, Jeongyun Kim, Ayoun...","KAIST, Dept. of Civil and Envtl. Eng., Daejeon...",South Korea


### Save new database

In [10]:
# Save the DataFrame to a CSV file with proper handling of newline characters
df.to_csv('slam_articles_with_country.csv', index=False, encoding='utf-8', quoting=1)

### Check how many articles have a country

In [11]:
df_filtered = df.dropna(subset=['country'])
N = len(df)
c_N = len(df_filtered)

print(f"{c_N}/{N} ({(100*c_N/N):.1f}%) articles with country")

12221/46913 (26.1%) articles with country
