In [12]:
from geopandas.tools import geocode
import pandas as pd

### Read the HTML

In [13]:
path = "https://en.wikipedia.org/wiki/List_of_most-visited_museums"
data = pd.read_html(path)
data = data[0]

In [14]:
data

Unnamed: 0,Name,"Country flag, city",Visitors per year,Year reported
0,Attendance figures for 2022,,,
1,Louvre,Paris,"7,800,000 [7]","2022 (2,285,000 in 2021)"
2,Galleria degli Uffizi,Florence,"4,000,000[8]",2022 (1.7 million in 2021)
3,National Museum of Natural History,"Washington, D.C.",3900000,2022[9]
4,Hermitage Museum,"Saint Petersburg, Russia",2700000,2022[10]
...,...,...,...,...
66,Chengdu Museum,Chengdu,2259000,2019[60]
67,Guangdong Museum,Guangzhou,2234800,2019[60]
68,Wenzhou Museum,Wenzhou,2229900,2019[60]
69,Suzhou Museum,Suzhou,2030000,2019[60]


### Save the data

In [15]:
data.to_excel("Most Visited Museums.xlsx")

### Read the data

In [16]:
museums = pd.read_excel("Most Visited Museums.xlsx",
                        usecols=[1,2,3],
                        names=["Name", "Country", "Visitors Per Year"]).dropna()

In [22]:
museums

Unnamed: 0,Name,Country,Visitors Per Year,Address,Lon,Lat
1,Louvre,Paris,"7,800,000 [7]","Musée du Louvre, Rue Saint-Honoré, Quartier du...",2.338028,48.861147
2,Galleria degli Uffizi,Florence,"4,000,000[8]","Galleria degli Uffizi, 6, Piazzale degli Uffiz...",11.255801,43.768313
3,National Museum of Natural History,"Washington, D.C.",3900000,"National Museum of Natural History, Smithsonia...",-77.025970,38.891245
4,Hermitage Museum,"Saint Petersburg, Russia",2700000,"Государственный Эрмитаж, набережная Зимней кан...",30.315487,59.941208
5,Centre Pompidou,Paris,3000000,"Centre Georges Pompidou, 19, Rue Beaubourg, Qu...",2.352474,48.860592
...,...,...,...,...,...,...
66,Chengdu Museum,Chengdu,2259000,"金沙遗址博物馆, 金博路, 茶店子街道, 金牛区, 成都市, 四川省, 610091, 中国",104.011994,30.685602
67,Guangdong Museum,Guangzhou,2234800,"广东省博物馆, 2号, 珠江东路, 猎德街道, 天河区, 广州市, 广东省, 510623, 中国",113.321425,23.117331
68,Wenzhou Museum,Wenzhou,2229900,"博物馆, 科技广场, 宏地温州府, 鹿城区, 温州市, 浙江省, 325002, 中国",120.694147,27.991784
69,Suzhou Museum,Suzhou,2030000,"苏州博物馆, 小飞虹桥, 拙政园社区, 平江街道, 姑苏区, 苏州市, 江苏省, 21500...",120.623093,31.325417


### Iterate through the df

In [18]:
for index, row in museums.iterrows():
    info = geocode(str(row['Name']), provider='nominatim', user_agent="python-requests/2.25")
    
    if(info['address'][0] == None):
        print("The geometrical and address data for the location: "+row['Name']+ " could not be found.")
        continue
    else:
        museums.loc[int(index), 'Address'] = info['address'].loc[0]
        museums.loc[int(index), 'Lon'] = info['geometry'].loc[0].x
        museums.loc[int(index), 'Lat'] = info['geometry'].loc[0].y

The geometrical and address data for the location: Smithsonian Museum of American History could not be found.
The geometrical and address data for the location: Tianjin Natural History Museum could not be found.
The geometrical and address data for the location: National Archeological Museum of Naples could not be found.
The geometrical and address data for the location: Chongqing Museum of Natural History could not be found.
The geometrical and address data for the location: Guangdong Museum of Revolutionary History could not be found.


### Drop the Nan columns

In [21]:
museums = museums.dropna()
museums

Unnamed: 0,Name,Country,Visitors Per Year,Address,Lon,Lat
1,Louvre,Paris,"7,800,000 [7]","Musée du Louvre, Rue Saint-Honoré, Quartier du...",2.338028,48.861147
2,Galleria degli Uffizi,Florence,"4,000,000[8]","Galleria degli Uffizi, 6, Piazzale degli Uffiz...",11.255801,43.768313
3,National Museum of Natural History,"Washington, D.C.",3900000,"National Museum of Natural History, Smithsonia...",-77.025970,38.891245
4,Hermitage Museum,"Saint Petersburg, Russia",2700000,"Государственный Эрмитаж, набережная Зимней кан...",30.315487,59.941208
5,Centre Pompidou,Paris,3000000,"Centre Georges Pompidou, 19, Rue Beaubourg, Qu...",2.352474,48.860592
...,...,...,...,...,...,...
66,Chengdu Museum,Chengdu,2259000,"金沙遗址博物馆, 金博路, 茶店子街道, 金牛区, 成都市, 四川省, 610091, 中国",104.011994,30.685602
67,Guangdong Museum,Guangzhou,2234800,"广东省博物馆, 2号, 珠江东路, 猎德街道, 天河区, 广州市, 广东省, 510623, 中国",113.321425,23.117331
68,Wenzhou Museum,Wenzhou,2229900,"博物馆, 科技广场, 宏地温州府, 鹿城区, 温州市, 浙江省, 325002, 中国",120.694147,27.991784
69,Suzhou Museum,Suzhou,2030000,"苏州博物馆, 小飞虹桥, 拙政园社区, 平江街道, 姑苏区, 苏州市, 江苏省, 21500...",120.623093,31.325417


### Save the df to an excel file

In [23]:
museums.to_excel("Most Visited Museums - Clean.xlsx")