### Merging all csv files

In [1]:
import pandas as pd
import os

# Set the directory where the CSV files are located
directory = "./Datasets"

# Create an empty list to store the dataframes
dfs = []

# Loop through each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Read the CSV file into a pandas dataframe and append it to the list
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        dfs.append(df)

# Merge all the dataframes in the list into a single dataframe
merged_df = pd.concat(dfs, ignore_index=True)
merged_df

Unnamed: 0,type,id,geometry/type,geometry/coordinates/0,geometry/coordinates/1,properties/xid,properties/name,properties/dist,properties/rate,properties/osm,properties/kinds,properties/wikidata
0,Feature,9725727,Point,72.571793,23.023519,N440015518,Sakar 4,119.685029,1,node/440015518,"other,unclassified_objects,interesting_places,...",
1,Feature,9725726,Point,72.572502,23.022966,N440015517,Sakar 2,124.220107,1,node/440015517,"other,unclassified_objects,interesting_places,...",
2,Feature,11447953,Point,72.567360,23.024933,N5274771822,Villa Shodhan,493.854279,3,node/5274771822,"other,unclassified_objects,interesting_places,...",Q2275530
3,Feature,11447952,Point,72.568848,23.026508,N1964926905,Gandhigram,515.384995,2,node/1964926905,"railway_stations,industrial_facilities,interes...",Q31294840
4,Feature,11263778,Point,72.577339,23.022081,W605629475,Manek Burj,610.685878,3,way/605629475,"historic,monuments_and_memorials,interesting_p...",Q18745720
...,...,...,...,...,...,...,...,...,...,...,...,...
2460,Feature,8594529,Point,72.830902,19.141336,W193632118,Infinity Mall,8749.467287,1,way/193632118,"cultural,cinemas,theatres_and_entertainments,i...",
2461,Feature,7082282,Point,72.831009,19.141422,N2125320198,Cinemax Versova,8751.322002,1,node/2125320198,"cultural,cinemas,theatres_and_entertainments,i...",
2462,Feature,7982585,Point,72.816658,19.129932,N3186375634,Someshwar Temple,8769.410054,1,node/3186375634,"religion,hindu_temples,interesting_places",
2463,Feature,5686420,Point,72.950737,19.037353,W315277378,sunni islakiya masjid,8797.520101,1,way/315277378,"religion,other_temples,interesting_places",


### Selecting desired columns and renaming them

In [8]:
new_df = merged_df[['id', 'geometry/coordinates/0', 'geometry/coordinates/1', 'properties/name', 'properties/kinds']].copy()
new_df.columns = ['features__id', 'features__geometry__coordinates__001', 'features__geometry__coordinates__002', 'features__properties__name', 'features__properties__kinds']
new_df

Unnamed: 0,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,9725727,72.571793,23.023519,Sakar 4,"other,unclassified_objects,interesting_places,..."
1,9725726,72.572502,23.022966,Sakar 2,"other,unclassified_objects,interesting_places,..."
2,11447953,72.567360,23.024933,Villa Shodhan,"other,unclassified_objects,interesting_places,..."
3,11447952,72.568848,23.026508,Gandhigram,"railway_stations,industrial_facilities,interes..."
4,11263778,72.577339,23.022081,Manek Burj,"historic,monuments_and_memorials,interesting_p..."
...,...,...,...,...,...
2460,8594529,72.830902,19.141336,Infinity Mall,"cultural,cinemas,theatres_and_entertainments,i..."
2461,7082282,72.831009,19.141422,Cinemax Versova,"cultural,cinemas,theatres_and_entertainments,i..."
2462,7982585,72.816658,19.129932,Someshwar Temple,"religion,hindu_temples,interesting_places"
2463,5686420,72.950737,19.037353,sunni islakiya masjid,"religion,other_temples,interesting_places"


In [9]:
new_df.isnull().sum()

features__id                              0
features__geometry__coordinates__001      0
features__geometry__coordinates__002      0
features__properties__name              435
features__properties__kinds               0
dtype: int64

In [10]:
new_df.columns.tolist()

['features__id',
 'features__geometry__coordinates__001',
 'features__geometry__coordinates__002',
 'features__properties__name',
 'features__properties__kinds']

In [11]:
new_df = new_df.dropna()
new_df = new_df[~new_df['features__properties__name'].str.contains('\d', na=False)]

In [13]:
new_df.to_csv("./Datasets/new_places.csv", index=False)