In [1]:
"""In the contest of the research questions of the project we analised the "Luoghi della cultura dataset" to understand what and how many entities were contained 
(with a focus specifically related to finding libraries, museumns and archives), counting them for regions and cities and then we tried to find out any museum, library, archive with a
 free entry or a ticket cost price."""


from rdflib import Graph
import csv
import pandas as pd
from IPython.display import display

# Load the  datastet luoghi della cultura RDF file
rdf_file =  r"C:\Users\HWRUser\Desktop\DHDK Course\Information Visualization\Data_Italian_publishers\dataset_luoghi_cultura.rdf"
g = Graph()
g.parse(rdf_file, format="xml") 


#Exploratory Query for identifying the type of entities inside the dataset 
query_0 = """
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 PREFIX cis: <http://dati.beniculturali.it/cis/>
 PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
 PREFIX dc: <http://purl.org/dc/elements/1.1/>

 SELECT ?entity ?entityType
  WHERE {
   {
       ?entity a ?entityType.
    }
   
    UNION
  {
    ?entity dc:type ?entityType .
  }
   
}

"""

#Performing the query
results = g.query(query_0)

#Dataframe for Entity Types
data_Entity_Type= []
for row in results:
    data_Entity_Type.append({'Entity_ID': str(row['entity']), 'Entity_Type': str(row['entityType'])})
df_entity_Type = pd.DataFrame(data_Entity_Type)

#Dataframe for counting the Entities according to their type and their total
entity_type_counts = df_entity_Type.groupby('Entity_Type').size().reset_index(name='Count')
total_count = entity_type_counts['Count'].sum()
total_row = pd.DataFrame({'Entity_Type': ['TOTAL'], 'Count': [total_count]})
entity_type_counts = pd.concat([entity_type_counts, total_row], ignore_index=True)

#Displaying entity types and their counts
display(df_entity_Type)
display(entity_type_counts)



Unnamed: 0,Entity_ID,Entity_Type
0,http://dati.beniculturali.it/mibact/luoghi/res...,https://w3id.org/italia/onto/SM/OnlineContactP...
1,http://dati.beniculturali.it/mibact/luoghi/res...,https://w3id.org/italia/onto/SM/OnlineContactP...
2,http://dati.beniculturali.it/mibact/luoghi/res...,https://w3id.org/italia/onto/SM/OnlineContactP...
3,http://dati.beniculturali.it/mibact/luoghi/res...,https://w3id.org/italia/onto/SM/OnlineContactP...
4,http://dati.beniculturali.it/mibact/luoghi/res...,https://w3id.org/italia/onto/SM/OnlineContactP...
...,...,...
134441,http://dati.beniculturali.it/mibact/luoghi/res...,I tesori della Cultura
134442,http://dati.beniculturali.it/mibact/luoghi/res...,Istituto Centrale
134443,http://dati.beniculturali.it/mibact/luoghi/res...,Soprintendenza Archivistica e Bibliografica
134444,http://dati.beniculturali.it/mibact/luoghi/res...,Comune


Unnamed: 0,Entity_Type,Count
0,Altro,177
1,Amministrazione dello Stato,1
2,Architettura Civile,175
3,Architettura Fortificata,130
4,Archivio,11
5,Archivio di Stato,142
6,Area Archeologica,386
7,Biblioteca,34
8,Biblioteca Statale,45
9,Chiesa o edificio di culto,330


In [2]:
"""During the early stages of studying the dataset, we noticed that there were several entity places whose institutional description (OtherPlace_Type) did not match exactly with their real names (OtherPlace_Name), 
so it was decided to investigate this discrepancy further."""

#Exploratory Query for identifying the Other entities as Places, their IDs, Names, Cities, Regions and Types

query_1 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cis: <http://dati.beniculturali.it/cis/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
PREFIX geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#>


SELECT ?OtherPlace ?OhterPlaceLabel ?OhterPlaceCity ?OtherPlaceRegion ?OtherPlaceType ?OtherPlaceLatitude ?OtherPlaceLongitude
WHERE {
  ?OtherPlace rdf:type cis:CulturalInstituteOrSite .
  ?OtherPlace dc:type ?OtherPlaceType.
  ?OtherPlace rdfs:label ?OhterPlaceLabel .
  ?OtherPlace cis:hasSite ?OtherPlaceSite.
  ?OtherPlaceSite cis:siteAddress ?OtherPlaceAddress.
  ?OtherPlaceAddress clvapit:hasRegion ?OtherPlaceRegion.
  ?OtherPlaceAddress clvapit:hasCity ?OhterPlaceCity.
  ?OtherPlace geo1:lat ?OtherPlaceLatitude.
  ?OtherPlace geo1:long ?OtherPlaceLongitude.
  
  FILTER NOT EXISTS { ?OtherPlace dc:type "Biblioteca" }
  FILTER NOT EXISTS { ?OtherPlace dc:type "Museo, Galleria e/o raccolta" }
  FILTER NOT EXISTS { ?OtherPlace dc:type "Archivio di Stato" }
  FILTER NOT EXISTS { ?OtherPlace dc:type "Archivio" }
  FILTER NOT EXISTS { ?OtherPlace dc:type "Biblioteca Statale" }



}
"""


#Perform the query
results = g.query(query_1)


#Ohter Places Dataframe
data_OtherPlaces = []
for row in results:
    data_OtherPlaces.append({'OtherPlace_ID': str(row['OtherPlace']), 'OtherPlace_Name': str(row['OhterPlaceLabel']), 'OtherPlace_Type': str(row['OtherPlaceType']), 'OtherPlace_City': str(row['OhterPlaceCity']), 'OtherPlace_Region': str(row['OtherPlaceRegion']), 'OtherPlace_Latitude': str(row['OtherPlaceLatitude']), 'OtherPlace_Longitude': str(row['OtherPlaceLongitude'])})
df_OtherPlaces = pd.DataFrame(data_OtherPlaces)

#Dtaframes for counting Ohter Places Types and their number for each region
OtherPlace_Type_counts = df_OtherPlaces.groupby('OtherPlace_Type').size().reset_index(name='Count')
total_count = OtherPlace_Type_counts['Count'].sum()
total_row = pd.DataFrame({'OtherPlace_Type': ['TOTAL'], 'Count': [total_count]})
OtherPlace_Type_counts = pd.concat([OtherPlace_Type_counts, total_row], ignore_index=True)


#Displaying OtherPlaces DataFrames and their type counts
display(df_OtherPlaces)
display(OtherPlace_Type_counts)

#CSV files from above Dataframes
csv_file_OtherPlaces = "OtherPlaces.csv"
df_OtherPlaces.to_csv(csv_file_OtherPlaces, index=False, encoding='utf-8')
print(f"Results have been written to {csv_file_OtherPlaces}")

# csv_file_OtherPlaces_Type = "OtherPlaces_Type_count.csv"
# OtherPlace_Type_counts.to_csv(csv_file_OtherPlaces_Type, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_OtherPlaces_Type}")



Unnamed: 0,OtherPlace_ID,OtherPlace_Name,OtherPlace_Type,OtherPlace_City,OtherPlace_Region,OtherPlace_Latitude,OtherPlace_Longitude
0,http://dati.beniculturali.it/mibact/luoghi/res...,Museo Annibale Di Francia,Chiesa o edificio di culto,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.18351,15.550492
1,http://dati.beniculturali.it/mibact/luoghi/res...,"Madonna della Pietà, Chiesa",Chiesa o edificio di culto,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.07576,12.957292
2,http://dati.beniculturali.it/mibact/luoghi/res...,Museo della Fondazione Whitaker,Altro,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,37.866444,12.468154
3,http://dati.beniculturali.it/mibact/luoghi/res...,Seminario diocesano,Chiesa o edificio di culto,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.31543,9.323138
4,http://dati.beniculturali.it/mibact/luoghi/res...,San Michele Arcangelo - Catacombe,Area Archeologica,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,42.407158,13.306156
...,...,...,...,...,...,...,...
1542,http://dati.beniculturali.it/mibact/luoghi/res...,Castello di Masazza,Architettura Fortificata,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.49067,8.165532
1543,http://dati.beniculturali.it/mibact/luoghi/res...,Teatro comunale di Fontanellato,Architettura Civile,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.881474,10.171771
1544,http://dati.beniculturali.it/mibact/luoghi/res...,Museo diocesano d'arte sacra di Trivento,Chiesa o edificio di culto,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.78528,14.551004
1545,http://dati.beniculturali.it/mibact/luoghi/res...,Cappella Manin,Chiesa o edificio di culto,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,46.06494,13.230724


Unnamed: 0,OtherPlace_Type,Count
0,Altro,153
1,Architettura Civile,166
2,Architettura Fortificata,120
3,Area Archeologica,328
4,Chiesa o edificio di culto,302
5,Istituto Centrale,1
6,Istituto dotato di autonomia speciale,1
7,Monumento,146
8,Monumento Funerario,17
9,Monumento di Archeologia Industriale,15


Results have been written to OtherPlaces.csv


In [3]:
""" The next phase was to identify all the libraries, musuems and archives entities (IDs, Names, City, Region, Longitute, Latitude), counting and diving them for their region and city. 
Cities and Regions are useful to us for understanding the distribution of museums,archives,libraries on the national territory, while longitute and latitute to pinpoint their exact location in a map.
We started our search from libraries."""

#Exploratory Query for identifying all the libraries IDs, Names, Cities, Regions, Longitute, Latitude.

query_2 = """
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 PREFIX cis: <http://dati.beniculturali.it/cis/>
 PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
 PREFIX dc: <http://purl.org/dc/elements/1.1/>
 PREFIX geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#>

 SELECT ?library ?librarylabel ?libraryCity ?libraryRegion ?librarylatitude ?librarylongitude 
 WHERE {

   {
    ?library a cis:Library .
  }
  UNION
  {
    ?library dc:type "Biblioteca Statale" .
  }

   ?library a cis:CulturalInstituteOrSite.
   ?library rdfs:label ?librarylabel.
   ?library cis:hasSite ?librarysite.
   ?librarysite cis:siteAddress ?libraryAddress.
   ?libraryAddress clvapit:hasRegion ?libraryRegion.
   ?libraryAddress clvapit:hasCity ?libraryCity.
   ?library geo1:lat ?librarylatitude.
   ?library geo1:long ?librarylongitude.
 } 
 
"""


# Perform the query
results = g.query(query_2)

#Libraries Dataframe
data_Libraries = []
for row in results:
    data_Libraries.append({'Library_ID': str(row['library']), 'Library_Name': str(row['librarylabel']),'Library_City': str(row['libraryCity']), 'Library_Region': str(row['libraryRegion']),'Library_Latitude': str(row['librarylatitude']), 'Library_Longitude': str(row['librarylongitude'])})
df_Libraries = pd.DataFrame(data_Libraries)

# List of IDs to remove that are not libraries: some of the entity libraries were classified in the dataset as libraries but after an in-depth search it was find out that they were not exactly libraries. For deciding if it was the case these entitie were compared with the same entity identified in WikiData.
IDs_to_remove=["http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102857",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102889",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102891",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/101975",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102879",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102917",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102858",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102876",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102853",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/110257",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102864",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102855"]

# Find rows to remove in Library_ID
rows_to_remove = df_Libraries[df_Libraries['Library_ID'].isin(IDs_to_remove)].index
library_df = df_Libraries.drop(rows_to_remove)
library_df.reset_index(drop=True, inplace=True)


#Dataframe for counting number of libraries for region and the total number
library_region_counts = library_df.groupby('Library_Region').size().reset_index(name='Count')
# Calculate the total count
total_library_region_count = library_region_counts['Count'].sum()
# Create a DataFrame for the total row
total_row = pd.DataFrame({'Library_Region': ['TOTAL'], 'Count': [total_library_region_count]})
# Concat the total row to the library region count dataframe
library_region_counts= pd.concat([library_region_counts, total_row], ignore_index=True)


#Dataframes for counting number of libraries for city and the total number
library_city_counts = library_df.groupby('Library_City').size().reset_index(name='Count')
total_library_city_count = library_city_counts['Count'].sum()
# Create a DataFrame for the total row
total_row = pd.DataFrame({'Library_City': ['TOTAL'], 'Count': [total_library_city_count]})
# Concat the total row to the library city count dataframe
library_city_counts= pd.concat([library_city_counts, total_row], ignore_index=True)

#Displaying library DataFrame, library region and library city count dataframe with total count
display(library_df)
display(library_region_counts)
display(library_city_counts)

# CSV files from all Dataframes for libraries
# csv_file_libraries = "Libraries_Luoghi_Cultura.csv"
# library_df.to_csv(csv_file_libraries, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_libraries}")

# csv_file_library_region_count = "Libraries_Region_Count_Luoghi_Cultura.csv"
# library_region_counts.to_csv(csv_file_library_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_region_count}")

# csv_file_library_city_count = "Libraries_City_Count_Luoghi_Cultura.csv"
# library_city_counts.to_csv(csv_file_library_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_city_count}")


Unnamed: 0,Library_ID,Library_Name,Library_City,Library_Region,Library_Latitude,Library_Longitude
0,http://dati.beniculturali.it/mibact/luoghi/res...,Centro dantesco dei Frati Minori Conventuali,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.41629,12.200871
1,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca 'Mario Gromo',http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.081963,7.681357
2,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca comunale,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.63192,13.71498
3,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca comunale - Museo,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,40.184994,18.35104
4,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Classense,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.414177,12.200108
...,...,...,...,...,...,...
56,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Universitaria di Sassari,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,40.722702,8.563728
57,http://dati.beniculturali.it/mibact/luoghi/res...,Musei Reali - Biblioteca Reale,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.07285,7.686345
58,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Statale Stelio Crise di Trieste,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.64559,13.763095
59,http://dati.beniculturali.it/mibact/luoghi/res...,San Matteo - Sede distaccata Biblioteca Univer...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.7152,10.407801


Unnamed: 0,Library_Region,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,4
3,http://dati.beniculturali.it/mibact/luoghi/res...,5
4,http://dati.beniculturali.it/mibact/luoghi/res...,2
5,http://dati.beniculturali.it/mibact/luoghi/res...,19
6,http://dati.beniculturali.it/mibact/luoghi/res...,2
7,http://dati.beniculturali.it/mibact/luoghi/res...,4
8,http://dati.beniculturali.it/mibact/luoghi/res...,1
9,http://dati.beniculturali.it/mibact/luoghi/res...,4


Unnamed: 0,Library_City,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,1
4,http://dati.beniculturali.it/mibact/luoghi/res...,1
5,http://dati.beniculturali.it/mibact/luoghi/res...,1
6,http://dati.beniculturali.it/mibact/luoghi/res...,1
7,http://dati.beniculturali.it/mibact/luoghi/res...,1
8,http://dati.beniculturali.it/mibact/luoghi/res...,1
9,http://dati.beniculturali.it/mibact/luoghi/res...,1


In [4]:
""" After an initial analysis of the previous dataframe, the existence of a specific type of libraries emerged ("Biblioteca Statale"). 
In order to identify this type and get a better view of the dataframe numbers, a further query was made on the dataset."""

#Exploratory query for identifying the Mibact libraries classified as dctype:"Biblioteca Statale"
query_2bis = """
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 PREFIX cis: <http://dati.beniculturali.it/cis/>
 PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
 PREFIX dc: <http://purl.org/dc/elements/1.1/>
 PREFIX geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#>

 SELECT ?library ?librarylabel ?libraryCity ?libraryRegion ?librarylatitude ?librarylongitude 
 WHERE {

   ?library dc:type "Biblioteca Statale" .
   ?library a cis:CulturalInstituteOrSite.
   ?library rdfs:label ?librarylabel.
   ?library cis:hasSite ?librarysite.
   ?librarysite cis:siteAddress ?libraryAddress.
   ?libraryAddress clvapit:hasRegion ?libraryRegion.
   ?libraryAddress clvapit:hasCity ?libraryCity.
   ?library geo1:lat ?librarylatitude.
   ?library geo1:long ?librarylongitude.
 } 
  

"""

# Perform the query
results = g.query(query_2bis)

#Mibact Libraries Dataframe
data_Libraries = []
for row in results:
    data_Libraries.append({'Library_ID': str(row['library']), 'Library_Name': str(row['librarylabel']),'Library_City': str(row['libraryCity']), 'Library_Region': str(row['libraryRegion']),'Library_Latitude': str(row['librarylatitude']), 'Library_Longitude': str(row['librarylongitude'])})
df_Libraries = pd.DataFrame(data_Libraries)

#  List of IDs to remove that are not libraries: some of the entity libraries were classified in the dataset as libraries but after an in-depth search it was find out that they were not exactly libraries. For deciding if it was the case these entitie were compared with the same entity identified in WikiData
IDs_to_remove=["http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102857",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102889",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102891",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/101975",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102879",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102917",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102858",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102876",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102853",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/110257",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102864",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/102855"]

# Find rows to remove in Library_ID
rows_to_remove = df_Libraries[df_Libraries['Library_ID'].isin(IDs_to_remove)].index
Mibact_libraries_df = df_Libraries.drop(rows_to_remove)
Mibact_libraries_df.reset_index(drop=True, inplace=True)

#Displaying Mibact Libraries Dataframe
display(Mibact_libraries_df)


# CSV files from Mibact Libraries Dataframe
# csv_file_Mibact_libraries = "Mibact_Libraries_Luoghi_Cultura.csv"
# Mibact_libraries_df.to_csv(csv_file_Mibact_libraries, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_Mibact_libraries}")

Unnamed: 0,Library_ID,Library_Name,Library_City,Library_Region,Library_Latitude,Library_Longitude
0,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca nazionale Marciana - Sale monumenta...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.434067,12.334472
1,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Nazionale di Cosenza,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,39.288624,16.26098
2,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Casanatense,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.898148,12.479639
3,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Statale di Lucca,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.8449,10.501189
4,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Riccardiana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.77558,11.255349
5,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Medicea Laurenziana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.774815,11.254537
6,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Statale del Monumento Nazionale di ...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.76406,13.37067
7,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Medica Statale di Roma,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,0.040255,-0.010042
8,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Universitaria di Padova,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.40711,11.880526
9,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca statale di Macerata,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.30105,13.448922


In [5]:
#	Exploratory Query for identifying the museums IDs, Names, Cities, Regions, Longitute and Latitude

query_3 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cis: <http://dati.beniculturali.it/cis/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
PREFIX geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#>


SELECT ?museum ?museumlabel ?museumCity ?museumRegion ?museumlatitude ?museumlongitude 
WHERE {
  ?museum rdf:type cis:CulturalInstituteOrSite .
  ?museum dc:type "Museo, Galleria e/o raccolta".
  ?museum rdfs:label ?museumlabel .
  ?museum cis:hasSite ?museumSite.
  ?museumSite cis:siteAddress ?museumAddress.
  ?museumAddress clvapit:hasRegion ?museumRegion.
  ?museumAddress clvapit:hasCity ?museumCity.
  ?museum geo1:lat ?museumlatitude.
  ?museum geo1:long ?museumlongitude.

}
"""


# Perform the query
results = g.query(query_3)

#Museums DataFrame
data_Museums = []
for row in results:
    data_Museums.append({'Museum_ID': str(row['museum']), 'Museum_Name': str(row['museumlabel']), 'Museum_City': str(row['museumCity']), 'Museum_Region': str(row['museumRegion']), 'Museum_Latitude': str(row['museumlatitude']), 'Museum_Longitude': str(row['museumlongitude']) })

df_Museums = pd.DataFrame(data_Museums)

"""Following an analysis of the Other type entities dataframe, other museum entites were identified but not categorised as such. For this reasonthis we decided to Enrich Museums Dataframe with other Museums entities from OtherPlaces dataframe, 
in order to not leave out any entities  that can be considered as a Museum."""

#filtering OtherPlaces dataframe according their names that contains museum(s) word
df_other_places = pd.DataFrame(data_OtherPlaces)
filtered_Other_Places_df = df_OtherPlaces[df_OtherPlaces['OtherPlace_Name'].str.contains('muse', case=False, na=False)]

#Renaming columns of new filtered Museum Dataframe
df_filtered_museums=filtered_Other_Places_df.rename(columns={
    'OtherPlace_ID': 'Museum_ID',
    'OtherPlace_Name': 'Museum_Name',
    'OtherPlace_City': 'Museum_City',
    'OtherPlace_Region': 'Museum_Region',
    'OtherPlace_Latitude': 'Museum_Latitude',
    'OtherPlace_Longitude': 'Museum_Longitude'
})

#Dropping "OtherPlace_Type column which is not needed in the new filtered Museum Dataframe"
df_filtered_museums = df_filtered_museums.drop(columns=['OtherPlace_Type'])


#Mergin the previous Museum Dataframe with the filtered Museum Dataframes to create a new Dataframe with all museum entities
df_Museums = pd.DataFrame(data_Museums)
df_museums_complete= pd.concat([df_filtered_museums, df_Museums], ignore_index=True)

#New dataframes for counting museum cities and regions with totals
museum_region_counts_complete = df_museums_complete.groupby('Museum_Region').size().reset_index(name='Count')
total_count = museum_region_counts_complete ['Count'].sum()
total_row = pd.DataFrame({'Museum_Region': ['TOTAL'], 'Count': [total_count]})
museum_region_counts_complete = pd.concat([museum_region_counts_complete, total_row], ignore_index=True)


museum_city_counts_complete = df_museums_complete.groupby('Museum_City').size().reset_index(name='Count')
total_count = museum_city_counts_complete ['Count'].sum()
total_row = pd.DataFrame({'Museum_City': ['TOTAL'], 'Count': [total_count]})
museum_city_counts_complete = pd.concat([museum_city_counts_complete, total_row], ignore_index=True)

#Displaying all dataframes for Museum entities, with cities, regions and total count
display(df_museums_complete)
display(museum_region_counts_complete)
display(museum_city_counts_complete)

# #CSV files for Museums from the above Dataframes
# csv_file_museums_complete = "Museums_complete.csv"
# df_museums_complete.to_csv(csv_file_museums_complete, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_complete}")

# csv_file_museums_region_count_complete = "Museums_region_count_complete.csv"
# museum_region_counts_complete.to_csv(csv_file_museums_region_count_complete, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_region_count_complete}")

# csv_file_museums_city_count_complete = "Museums_city_count_complete.csv"
# museum_city_counts_complete.to_csv(csv_file_museums_city_count_complete, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_city_count_complete}")



Unnamed: 0,Museum_ID,Museum_Name,Museum_City,Museum_Region,Museum_Latitude,Museum_Longitude
0,http://dati.beniculturali.it/mibact/luoghi/res...,Museo Annibale Di Francia,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.18351,15.550492
1,http://dati.beniculturali.it/mibact/luoghi/res...,Museo della Fondazione Whitaker,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,37.866444,12.468154
2,http://dati.beniculturali.it/mibact/luoghi/res...,Parco Poesia Pascoli - Museo Casa Pascoli,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.105186,12.415602
3,http://dati.beniculturali.it/mibact/luoghi/res...,Museo di arte contemporanea Giuseppe e Titina ...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.868443,9.266259
4,http://dati.beniculturali.it/mibact/luoghi/res...,Museo armi antiche Fosco Baboni,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.149033,10.64564
...,...,...,...,...,...,...
4554,http://dati.beniculturali.it/mibact/luoghi/res...,Museo della satira e della caricatura,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.959473,10.169657
4555,http://dati.beniculturali.it/mibact/luoghi/res...,Museo Fortuny,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.43519,12.335712
4556,http://dati.beniculturali.it/mibact/luoghi/res...,Pinacoteca e Museo delle Arti,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.236107,16.265602
4557,http://dati.beniculturali.it/mibact/luoghi/res...,Pinacoteca dei Padri Cappuccini,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.615677,8.843204


Unnamed: 0,Museum_Region,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,128
1,http://dati.beniculturali.it/mibact/luoghi/res...,63
2,http://dati.beniculturali.it/mibact/luoghi/res...,213
3,http://dati.beniculturali.it/mibact/luoghi/res...,230
4,http://dati.beniculturali.it/mibact/luoghi/res...,490
5,http://dati.beniculturali.it/mibact/luoghi/res...,150
6,http://dati.beniculturali.it/mibact/luoghi/res...,308
7,http://dati.beniculturali.it/mibact/luoghi/res...,180
8,http://dati.beniculturali.it/mibact/luoghi/res...,461
9,http://dati.beniculturali.it/mibact/luoghi/res...,240


Unnamed: 0,Museum_City,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,2
3,http://dati.beniculturali.it/mibact/luoghi/res...,1
4,http://dati.beniculturali.it/mibact/luoghi/res...,2
...,...,...
2242,http://dati.beniculturali.it/mibact/luoghi/res...,1
2243,http://dati.beniculturali.it/mibact/luoghi/res...,1
2244,http://dati.beniculturali.it/mibact/luoghi/res...,1
2245,http://dati.beniculturali.it/mibact/luoghi/res...,1


In [6]:
#Exploratory Query for identifying the Archive IDs, Names, Cities and Regions, Longitute and Latitude

query_4 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cis: <http://dati.beniculturali.it/cis/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
PREFIX geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#>


SELECT ?archive ?archivelabel ?archiveCity ?archiveRegion ?archiveLatitude ?archiveLongitude
WHERE {

   {
    ?archive dc:type "Archivio di Stato".
  }
  UNION
  {
   ?archive dc:type "Archivio".
  }


  ?archive rdf:type cis:CulturalInstituteOrSite .
  ?archive rdfs:label ?archivelabel .
  ?archive cis:hasSite ?archiveSite.
  ?archiveSite cis:siteAddress ?archiveAddress.
  ?archiveAddress clvapit:hasRegion ?archiveRegion.
  ?archiveAddress clvapit:hasCity ?archiveCity.
  ?archive geo1:lat ?archiveLatitude.
  ?archive geo1:long ?archiveLongitude.
}
"""


# Perform the query
results = g.query(query_4)


#Archives Dataframe
data_Archives = []
for row in results:
    data_Archives.append({'Archive_ID': str(row['archive']), 'Archive_Name': str(row['archivelabel']), 'Archive_City': str(row['archiveCity']),'Archive_Region': str(row['archiveRegion']),'Archive_Latitude': str(row['archiveLatitude']), 'Archive_Longitude': str(row['archiveLongitude'])})
df_archives = pd.DataFrame(data_Archives)

#Dataframes for counting archives by their city and region
archive_region_counts = df_archives.groupby('Archive_Region').size().reset_index(name='Count')
total_count = archive_region_counts ['Count'].sum()
total_row = pd.DataFrame({'Archive_Region': ['TOTAL'], 'Count': [total_count]})
archive_region_counts = pd.concat([archive_region_counts, total_row], ignore_index=True)

archive_city_counts = df_archives.groupby('Archive_City').size().reset_index(name='Count')
total_count = archive_city_counts['Count'].sum()
total_row = pd.DataFrame({'Archive_City': ['TOTAL'], 'Count': [total_count]})
archive_city_counts = pd.concat([archive_city_counts, total_row], ignore_index=True)

#Displaying all dataframes for Archives entities
display(df_archives)
display(archive_region_counts)
display(archive_city_counts)


#CSV from all Dataframes for Archives
# csv_file_archives = "Archives_Luoghi_Cultura.csv"
# df_archives.to_csv(csv_file_archives, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archives}")

# csv_file_archives_region_count = "Archives_region_count_Luoghi_Cultura.csv"
# archive_region_counts.to_csv(csv_file_archives_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archives_region_count}")


# csv_file_archives_city_count = "Archives_city_count_Luoghi_Cultura.csv"
# archive_city_counts.to_csv(csv_file_archives_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archives_city_count}")


Unnamed: 0,Archive_ID,Archive_Name,Archive_City,Archive_Region,Archive_Latitude,Archive_Longitude
0,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Trento,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,46.08832,11.109711
1,http://dati.beniculturali.it/mibact/luoghi/res...,"Archivio di Stato di Bologna, Sezione di Imola",http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.35559,11.711252
2,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio centrale dello Stato,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.82904,12.47538
3,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Ancona. Sede distaccata,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.595825,13.503158
4,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Catanzaro,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.90427,16.59281
...,...,...,...,...,...,...
135,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Varese,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.816025,8.815864
136,http://dati.beniculturali.it/mibact/luoghi/res...,Casa Zegna,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.67137,8.157712
137,http://dati.beniculturali.it/mibact/luoghi/res...,Collezione storica ATC Bologna,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.507275,11.350482
138,http://dati.beniculturali.it/mibact/luoghi/res...,Raccolta Archivio Toraldo di Francia,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.677116,15.897367


Unnamed: 0,Archive_Region,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,7
1,http://dati.beniculturali.it/mibact/luoghi/res...,3
2,http://dati.beniculturali.it/mibact/luoghi/res...,9
3,http://dati.beniculturali.it/mibact/luoghi/res...,5
4,http://dati.beniculturali.it/mibact/luoghi/res...,11
5,http://dati.beniculturali.it/mibact/luoghi/res...,4
6,http://dati.beniculturali.it/mibact/luoghi/res...,7
7,http://dati.beniculturali.it/mibact/luoghi/res...,6
8,http://dati.beniculturali.it/mibact/luoghi/res...,9
9,http://dati.beniculturali.it/mibact/luoghi/res...,12


Unnamed: 0,Archive_City,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,2
2,http://dati.beniculturali.it/mibact/luoghi/res...,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,1
4,http://dati.beniculturali.it/mibact/luoghi/res...,4
...,...,...
122,http://dati.beniculturali.it/mibact/luoghi/res...,1
123,http://dati.beniculturali.it/mibact/luoghi/res...,1
124,http://dati.beniculturali.it/mibact/luoghi/res...,1
125,http://dati.beniculturali.it/mibact/luoghi/res...,1


In [19]:
""" Another important aspect of our dataset analysis was to understand if there were entities with a free entry or a ticket to pay, particularly with a focus related to museums, libraries and 
 archives. This was done in order to understand the level of affordability /accessibility from an economic point of view  of different institutions."""


#		Exploratory query for defining entities and if they have a ticket cost/entry price or if they are free 
query_5 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cis: <http://dati.beniculturali.it/cis/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX clvapit: <https://w3id.org/italia/onto/CLV/>
PREFIX potapit: <https://w3id.org/italia/onto/POT/>


SELECT ?Entity ?EntryPrice
WHERE {
  ?Entity rdf:type potapit:PriceSpecification.
  ?Entity potapit:hasCurrencyValue ?EntryPrice.

}
"""

# Perform the query
results = g.query(query_5)


#Dataframe for All Entities and the Ticket Prices or Free Entry 
data_Prices = []
for row in results:
    data_Prices.append({'Entity_ID': str(row['Entity']), 'Entry_Price': str(row['EntryPrice'])})
df_Ticket_Free_Entry_Prices = pd.DataFrame(data_Prices)

#Datarames dividing entities with free entry and entities with a ticket cost
df_free_entry = df_Ticket_Free_Entry_Prices[df_Ticket_Free_Entry_Prices['Entry_Price'].str.contains('Gratuito', case=False, na=False)]
df_free_entry = df_free_entry.reset_index(drop=True)
df_ticket_cost = df_Ticket_Free_Entry_Prices[df_Ticket_Free_Entry_Prices['Entry_Price'].str.match(r'^\d+(,\d+)?(\.\d+)?$', na=False)]
df_ticket_cost = df_ticket_cost.reset_index(drop=True)

#Displaying dataframes of entities with a ticket cost or a free entry all toghether and separated dataframes for entities with ticket cost and free entry
display(df_Ticket_Free_Entry_Prices)
display(df_free_entry)
display(df_ticket_cost)


# #CSV files from all Dataframes Entities with their Ticket Cost/Entry Price
# csv_Entities_Ticket_Entry_Prices = "Entities_Ticket_Prices_Free_Entry.csv"
# df_Ticket_Free_Entry_Prices.to_csv(csv_Entities_Ticket_Entry_Prices, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_Entities_Ticket_Entry_Prices}")

# csv_Entities_Free_Entry = "Entities_Free_Entry.csv"
# df_free_entry.to_csv(csv_Entities_Free_Entry, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_Entities_Free_Entry}")

# csv_Entities_Ticket_Cost = "Entities_Ticket_Cost.csv"
# df_ticket_cost.to_csv(csv_Entities_Ticket_Cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_Entities_Ticket_Cost}")

Unnamed: 0,Entity_ID,Entry_Price
0,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
1,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
2,http://dati.beniculturali.it/mibact/luoghi/res...,4.0
3,http://dati.beniculturali.it/mibact/luoghi/res...,350
4,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
...,...,...
5072,http://dati.beniculturali.it/mibact/luoghi/res...,500
5073,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
5074,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
5075,http://dati.beniculturali.it/mibact/luoghi/res...,500


Unnamed: 0,Entity_ID,Entry_Price
0,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
1,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
2,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
3,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
4,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
...,...,...
2161,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
2162,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
2163,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito
2164,http://dati.beniculturali.it/mibact/luoghi/res...,Gratuito


Unnamed: 0,Entity_ID,Entry_Price
0,http://dati.beniculturali.it/mibact/luoghi/res...,4.0
1,http://dati.beniculturali.it/mibact/luoghi/res...,350
2,http://dati.beniculturali.it/mibact/luoghi/res...,300
3,http://dati.beniculturali.it/mibact/luoghi/res...,400
4,http://dati.beniculturali.it/mibact/luoghi/res...,600
...,...,...
2906,http://dati.beniculturali.it/mibact/luoghi/res...,500
2907,http://dati.beniculturali.it/mibact/luoghi/res...,500
2908,http://dati.beniculturali.it/mibact/luoghi/res...,500
2909,http://dati.beniculturali.it/mibact/luoghi/res...,500


In [8]:
""" After identifying entities with a ticket cost/free entry, we focused more on understanding how many musuems, libraries, archives have a free entry or a ticket cost. We started this phase from all the museums entitites of the Museum dataframe: we compared those museum entites with all the entities in the ticket cost and free entry dataframes in order to check how many museums
were free and how many had a ticket cost (if any)."""

#CHECKING FOR FREE ENTRY MUSEUMS

#Script for extracting IDs from Dataframe of Entites with Free Entry
import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        return match.group(1)  
    return None  

numeric_ids = df_free_entry['Entity_ID'].apply(extract_id).tolist()

#Script for checking if any Museum Ids in Museums Dataframe match with Ids in numeric_ids list made from free enty entities Dataframe
def check_id_in_museum_id(museum_id):
    for numeric_id in numeric_ids:
        if numeric_id in museum_id:
            return 'Free Entry'
    return None

#Dataframes of Museums with free entry cost and their number for each region and city with a total count
df_museums_complete['Ticket_Cost'] = df_museums_complete['Museum_ID'].apply(check_id_in_museum_id)
df_museum_free_entry_cost = df_museums_complete[df_museums_complete['Ticket_Cost'] == 'Free Entry'].reset_index(drop=True)

df_museum_free_entry_region_counts = df_museum_free_entry_cost.groupby(['Museum_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_museum_free_entry_region_counts['Count'].sum()
total_row = pd.DataFrame({'Museum_Region': ['TOTAL'], 'Count': [total_count]})
df_museum_free_entry_region_counts = pd.concat([df_museum_free_entry_region_counts, total_row], ignore_index=True).fillna('')

df_museum_free_entry_city_counts = df_museum_free_entry_cost.groupby(['Museum_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_museum_free_entry_city_counts['Count'].sum()
total_row = pd.DataFrame({'Museum_City': ['TOTAL'], 'Count': [total_count]})
df_museum_free_entry_city_counts = pd.concat([df_museum_free_entry_city_counts, total_row], ignore_index=True).fillna('')

#Displaying dataframes for Museum free entry cost entities, their city and region count and a total count
display(df_museum_free_entry_cost)
display(df_museum_free_entry_region_counts)
display(df_museum_free_entry_city_counts)

# #CSV of all above Dataframes
# csv_file_museums_free_entry_cost = "Museums_Free_Entry_cost.csv"
# df_museum_free_entry_cost.to_csv(csv_file_museums_free_entry_cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_free_entry_cost}")

# csv_file_museums_free_entry_region_count = "Museums_Free_Entry_region_count.csv"
# df_museum_free_entry_region_counts.to_csv(csv_file_museums_free_entry_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_free_entry_region_count}")

# csv_file_museums_free_entry_city_count = "Museums_Free_Entry_city_count.csv"
# df_museum_free_entry_city_counts.to_csv(csv_file_museums_free_entry_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums_free_entry_city_count}")

Unnamed: 0,Museum_ID,Museum_Name,Museum_City,Museum_Region,Museum_Latitude,Museum_Longitude,Ticket_Cost
0,http://dati.beniculturali.it/mibact/luoghi/res...,Museo armi antiche Fosco Baboni,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.149033,10.64564,Free Entry
1,http://dati.beniculturali.it/mibact/luoghi/res...,Piccolo museo delle anime del Purgatorio,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.90413,12.46242,Free Entry
2,http://dati.beniculturali.it/mibact/luoghi/res...,Museo del Convento di Santa Chiara,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.139618,13.073348,Free Entry
3,http://dati.beniculturali.it/mibact/luoghi/res...,Castello normanno - Museo civico,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,37.555782,15.148047,Free Entry
4,http://dati.beniculturali.it/mibact/luoghi/res...,Castello di Spezzano e Museo della Ceramica,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.52156,10.846728,Free Entry
...,...,...,...,...,...,...,...
1365,http://dati.beniculturali.it/mibact/luoghi/res...,Museo del bracconaggio e delle trappole,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.630074,9.731209,Free Entry
1366,http://dati.beniculturali.it/mibact/luoghi/res...,"Casa museo ""Joe Petrosino"" Padula-New York-Pal...",http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,40.337982,15.655945,Free Entry
1367,http://dati.beniculturali.it/mibact/luoghi/res...,"Mostra archeologica permanente ""Divi & Dei""",http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.43615,13.663471,Free Entry
1368,http://dati.beniculturali.it/mibact/luoghi/res...,Museo della civiltà contadina di Sefro,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.1482,12.948257,Free Entry


Unnamed: 0,Museum_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,27
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,7
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,26
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,161
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,347
5,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,104
6,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,177
7,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,107
8,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,157
9,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,54


Unnamed: 0,Museum_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,3
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,2
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
...,...,...,...
833,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
834,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
835,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
836,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1


In [9]:
#CHECKING FOR MUSEUMS WITH A TICKET COST

#Script for extracting IDs from Dataframe of Entites with a ticket cost entry

import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        numeric_id = match.group(1)  
        price = df_ticket_cost.loc[df_ticket_cost['Entity_ID'] == entity_id, 'Entry_Price'].iloc[0]  
        return (numeric_id, price)  
    return None  
numeric_ids_with_prices= df_ticket_cost['Entity_ID'].apply(extract_id).tolist()


#Script for checking if any Museum Ids in Museums Dataframe match with Ids in numeric_ids_with_prices list made from ticket cost entities Dataframe
def check_id_in_museum_id(museum_id):
    for numeric_id, price in numeric_ids_with_prices:
        if numeric_id in museum_id:
            return price  
    return None  

#Dataframes of Museums with ticket entry cost and their number for each region and city with a total count
df_museums_complete['Ticket_Cost'] = df_museums_complete['Museum_ID'].apply(check_id_in_museum_id)
df_museum_ticket_cost = df_museums_complete[df_museums_complete['Ticket_Cost'].notnull()].reset_index(drop=True)
df_museum_ticket_cost['Ticket_Cost'] = df_museum_ticket_cost['Ticket_Cost'].str.replace(',', '.').astype(float).map('{:.2f}'.format)

df_museum_ticket_cost_region_counts = df_museum_ticket_cost.groupby(['Museum_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_museum_ticket_cost_region_counts['Count'].sum()
total_row = pd.DataFrame({'Museum_Region': ['TOTAL'], 'Count': [total_count]})
df_museum_ticket_cost_region_counts = pd.concat([df_museum_ticket_cost_region_counts, total_row], ignore_index=True).fillna('')


df_museum_ticket_cost_city_counts = df_museum_ticket_cost.groupby(['Museum_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_museum_ticket_cost_city_counts['Count'].sum()
total_row = pd.DataFrame({'Museum_City': ['TOTAL'], 'Count': [total_count]})
df_museum_ticket_cost_city_counts = pd.concat([df_museum_ticket_cost_city_counts, total_row], ignore_index=True).fillna('')

#Displaying dataframes for Museum ticket cost entities, their city and region count and a total count
display(df_museum_ticket_cost)
display(df_museum_ticket_cost_region_counts)
display(df_museum_ticket_cost_city_counts)

#CSV of all above Dataframes
# csv_file_museums__ticket_cost_price = "Museums_Ticket_Cost_Price.csv"
# df_museum_ticket_cost.to_csv(csv_file_museums__ticket_cost_price, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums__ticket_cost_price}")

# csv_file_museums__ticket_cost_region_count = "Museums_Ticket_Cost_Region_count.csv"
# df_museum_ticket_cost_region_counts.to_csv(csv_file_museums__ticket_cost_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums__ticket_cost_region_count}")

# csv_file_museums__ticket_cost_city_count = "Museums_Ticket_Cost_City_count.csv"
# df_museum_ticket_cost_city_counts.to_csv(csv_file_museums__ticket_cost_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_museums__ticket_cost_city_count}")



Unnamed: 0,Museum_ID,Museum_Name,Museum_City,Museum_Region,Museum_Latitude,Museum_Longitude,Ticket_Cost
0,http://dati.beniculturali.it/mibact/luoghi/res...,Parco Poesia Pascoli - Museo Casa Pascoli,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.105186,12.415602,3.00
1,http://dati.beniculturali.it/mibact/luoghi/res...,Museo di arte contemporanea Giuseppe e Titina ...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.868443,9.266259,2.00
2,http://dati.beniculturali.it/mibact/luoghi/res...,Museo delle navi romane di Nemi,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.721714,12.701998,5.00
3,http://dati.beniculturali.it/mibact/luoghi/res...,Museo parrocchiale di Castroreale,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.100582,15.212629,2.00
4,http://dati.beniculturali.it/mibact/luoghi/res...,Parco archeologico e Museo all'aperto della Te...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.57048,10.905629,7.00
...,...,...,...,...,...,...,...
1181,http://dati.beniculturali.it/mibact/luoghi/res...,Museo del merletto di Rapallo,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.352203,9.230796,3.00
1182,http://dati.beniculturali.it/mibact/luoghi/res...,MAXXI - Museo nazionale delle arti del XXI secolo,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.928467,12.468314,12.00
1183,http://dati.beniculturali.it/mibact/luoghi/res...,Musei civici di Sarnano,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.03573,13.300746,5.00
1184,http://dati.beniculturali.it/mibact/luoghi/res...,Villa Croce,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.39816,8.936704,5.00


Unnamed: 0,Museum_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,1.00,6
1,http://dati.beniculturali.it/mibact/luoghi/res...,1.50,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,1.70,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,2.00,6
4,http://dati.beniculturali.it/mibact/luoghi/res...,2.50,1
...,...,...,...
278,http://dati.beniculturali.it/mibact/luoghi/res...,6.00,6
279,http://dati.beniculturali.it/mibact/luoghi/res...,7.00,3
280,http://dati.beniculturali.it/mibact/luoghi/res...,8.00,2
281,http://dati.beniculturali.it/mibact/luoghi/res...,9.00,1


Unnamed: 0,Museum_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,3.00,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,5.00,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,1.50,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,4.00,2
4,http://dati.beniculturali.it/mibact/luoghi/res...,4.00,1
...,...,...,...
962,http://dati.beniculturali.it/mibact/luoghi/res...,2.00,1
963,http://dati.beniculturali.it/mibact/luoghi/res...,5.00,1
964,http://dati.beniculturali.it/mibact/luoghi/res...,5.00,1
965,http://dati.beniculturali.it/mibact/luoghi/res...,3.00,1


In [10]:
#CHECKING FOR LIBRARIES WITH FREE ENTRY

#Script for extracting IDs from Dataframe of Entites with a Free Entry cost

import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        return match.group(1)  
    return None  

numeric_ids = df_free_entry['Entity_ID'].apply(extract_id).tolist()


#Script for checking if any Library Ids in Library Dataframe match with Ids in numeric_ids list made from free enty entities Dataframe
def check_id_in_library_id(library_id):
    for numeric_id in numeric_ids:
        if numeric_id in library_id:
            return 'Free Entry' 
    return None

#Dataframes of Libraries with free entry cost and their number for each regiont with a total count
library_df['Ticket_Cost'] = library_df['Library_ID'].apply(check_id_in_library_id)
df_library_free_entry_cost = library_df[library_df['Ticket_Cost'] == 'Free Entry'].reset_index(drop=True)

df_library_free_entry_region_counts = df_library_free_entry_cost.groupby(['Library_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_library_free_entry_region_counts['Count'].sum()
total_row = pd.DataFrame({'Library_Region': ['TOTAL'], 'Count': [total_count]})
df_library_free_entry_region_counts = pd.concat([df_library_free_entry_region_counts, total_row], ignore_index=True).fillna('')

df_library_free_entry_city_counts = df_library_free_entry_cost.groupby(['Library_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_library_free_entry_city_counts['Count'].sum()
total_row = pd.DataFrame({'Library_City': ['TOTAL'], 'Count': [total_count]})
df_library_free_entry_city_counts = pd.concat([df_library_free_entry_city_counts, total_row], ignore_index=True).fillna('')

#Displaying dataframes for libraries entites with a free entry and their region/city count and a total count
display(df_library_free_entry_cost)
display(df_library_free_entry_region_counts)
display(df_library_free_entry_city_counts)

#CSV of all above DataFrame
# csv_file_library_free_ticket_cost_price = "library_free_entry_Cost_Price.csv"
# df_library_free_entry_cost.to_csv(csv_file_library_free_ticket_cost_price, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_free_ticket_cost_price}")

# csv_file_library_free_entry_region_count = "library_Free_Entry_region_count.csv"
# df_library_free_entry_region_counts.to_csv(csv_file_library_free_entry_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_free_entry_region_count}")

# csv_file_library_free_entry_city_count = "library_Free_Entry_city_count.csv"
# df_library_free_entry_city_counts.to_csv(csv_file_library_free_entry_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_free_entry_city_count}")



Unnamed: 0,Library_ID,Library_Name,Library_City,Library_Region,Library_Latitude,Library_Longitude,Ticket_Cost
0,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Casanatense,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.898148,12.479639,Free Entry
1,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Statale di Lucca,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.8449,10.501189,Free Entry
2,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Riccardiana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.77558,11.255349,Free Entry
3,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Medicea Laurenziana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.774815,11.254537,Free Entry
4,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Medica Statale di Roma,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,0.040255,-0.010042,Free Entry
5,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Universitaria di Padova,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.40711,11.880526,Free Entry
6,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca statale di Macerata,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.30105,13.448922,Free Entry
7,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Vallicelliana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.898186,12.469432,Free Entry
8,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Statale del Monumento Nazionale di ...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.925537,13.104736,Free Entry
9,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca Universitaria di Cagliari,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,39.217236,9.115117,Free Entry


Unnamed: 0,Library_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,3
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,2
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,2
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,12
5,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
6,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,3
7,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
8,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
9,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,2


Unnamed: 0,Library_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
5,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,4
6,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
7,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
8,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
9,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1


In [11]:
#Finding if there is any library with a TICKET COST for entry (only 1 found!)

import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        numeric_id = match.group(1)  
        price = df_ticket_cost.loc[df_ticket_cost['Entity_ID'] == entity_id, 'Entry_Price'].iloc[0]  
        return (numeric_id, price)  
    return None  
numeric_ids_with_prices= df_ticket_cost['Entity_ID'].apply(extract_id).tolist()


#Cheking if any library ids of Libraries Dataframe in ticket cost dataframe entities
def check_id_in_library_id(library_id):
    for numeric_id, price in numeric_ids_with_prices:
        if numeric_id in library_id:
            return price  
    return None  
library_df['Ticket_Cost'] = library_df['Library_ID'].apply(check_id_in_library_id)
df_library_ticket_cost = library_df[library_df['Ticket_Cost'].notnull()].reset_index(drop=True)

#removing wrong matches from dataframes of library ticket cost
IDs_to_remove=["http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/117195",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/117246",
                "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/117148"
                ]
df_library_ticket_cost = df_library_ticket_cost[~df_library_ticket_cost['Library_ID'].isin(IDs_to_remove)]
df_library_ticket_cost['Ticket_Cost'] = df_library_ticket_cost['Ticket_Cost'].str.replace(',', '.').astype(float).map('{:.2f}'.format)

#Dataframes of libraries with a ticket cost, their region/city count and a total count
df_library_ticket_cost_region_counts = df_library_ticket_cost.groupby(['Library_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_library_ticket_cost_region_counts ['Count'].sum()
total_row = pd.DataFrame({'Library_Region': ['TOTAL'], 'Count': [total_count]})
df_library_ticket_cost_region_counts   = pd.concat([df_library_ticket_cost_region_counts , total_row], ignore_index=True).fillna('')


df_library_ticket_cost_city_count = df_library_ticket_cost.groupby(['Library_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_library_ticket_cost_city_count['Count'].sum()
total_row = pd.DataFrame({'Library_City': ['TOTAL'], 'Count': [total_count]})
df_library_ticket_cost_city_count  = pd.concat([df_library_ticket_cost_city_count, total_row], ignore_index=True).fillna('')

#Displaying library ticket cost dataframe, their region/city count and a total count
display(df_library_ticket_cost)
display(df_library_ticket_cost_region_counts)
display(df_library_ticket_cost_city_count)

# CSV of above Dataframes
# csv_file_library_ticket_cost_price = "library_ticket_cost_Price.csv"
# df_library_ticket_cost.to_csv(csv_file_library_ticket_cost_price, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_ticket_cost_price}")

# csv_file_library_ticket_cost_region_count = "library_ticket_cost_region_count.csv"
# df_library_ticket_cost_region_counts.to_csv(csv_file_library_ticket_cost_region_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_ticket_cost_region_count}")

# csv_file_library_ticket_cost_city_count = "library_ticket_cost_city_count.csv"
# df_library_ticket_cost_city_count.to_csv(csv_file_library_ticket_cost_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_library_ticket_cost_city_count}")


Unnamed: 0,Library_ID,Library_Name,Library_City,Library_Region,Library_Latitude,Library_Longitude,Ticket_Cost
1,http://dati.beniculturali.it/mibact/luoghi/res...,Biblioteca nazionale Marciana - Sale monumenta...,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.434067,12.334472,25.0


Unnamed: 0,Library_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,25.0,1
1,TOTAL,,1


Unnamed: 0,Library_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,25.0,1
1,TOTAL,,1


In [20]:
#CHEKING FOR ANY ARCHIVE WITH A FREE ENTRY COST

#Script for extracting IDs from Dataframe of Entites with a Free Entry cost

import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        return match.group(1)  
    return None  

numeric_ids = df_free_entry['Entity_ID'].apply(extract_id).tolist()

#Script for checking if any Archive Ids in Library Dataframe match with Ids in numeric_ids list made from free enty entities Dataframe
def check_id_in_archive_id(archive_id):
    for numeric_id in numeric_ids:
        if numeric_id in archive_id:
            return 'Free Entry' 
    return None

#Dataframes of Archivies with free entry cost, their number for each region and a total count
df_archives['Ticket_Cost'] = df_archives['Archive_ID'].apply(check_id_in_archive_id)
df_archive_free_ticket_cost = df_archives[df_archives['Ticket_Cost'] == 'Free Entry'].reset_index(drop=True)

df_archive_free_entry_region_counts = df_archive_free_ticket_cost.groupby(['Archive_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_archive_free_entry_region_counts ['Count'].sum()
total_row = pd.DataFrame({'Archive_Region': ['TOTAL'], 'Count': [total_count]})
df_archive_free_entry_region_counts  = pd.concat([df_archive_free_entry_region_counts , total_row], ignore_index=True).fillna('')

df_archive_free_entry_city_counts = df_archive_free_ticket_cost.groupby(['Archive_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_archive_free_entry_city_counts['Count'].sum()
total_row = pd.DataFrame({'Archive_City': ['TOTAL'], 'Count': [total_count]})
df_archive_free_entry_city_counts  = pd.concat([df_archive_free_entry_city_counts, total_row], ignore_index=True).fillna('')

#Displaying dataframes of archive free entry entities, their region/city count and a total count
display(df_archive_free_ticket_cost)
display(df_archive_free_entry_region_counts)
display(df_archive_free_entry_city_counts)

#CSV of above Dataframes
# csv_file_archive_free_entry_cost = "Archive__free_entry_cost.csv"
# df_archive_free_ticket_cost.to_csv(csv_file_archive_free_entry_cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_free_entry_cost }")

# csv_file_archive_free_entry_region = "Archive_Free_Entry_region_count.csv"
# df_archive_free_entry_region_counts.to_csv(csv_file_archive_free_entry_region, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_free_entry_region}")

# csv_file_archive_free_entry_city_count = "Archive_Free_Entry_city_count.csv"
# df_archive_free_entry_city_counts.to_csv(csv_file_archive_free_entry_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_free_entry_city_count}")



Unnamed: 0,Archive_ID,Archive_Name,Archive_City,Archive_Region,Archive_Latitude,Archive_Longitude,Ticket_Cost
0,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Trento,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,46.08832,11.109711,Free Entry
1,http://dati.beniculturali.it/mibact/luoghi/res...,"Archivio di Stato di Bologna, Sezione di Imola",http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.35559,11.711252,Free Entry
2,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio centrale dello Stato,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.82904,12.47538,Free Entry
3,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Ancona. Sede distaccata,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.595825,13.503158,Free Entry
4,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Catanzaro,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,38.90427,16.59281,Free Entry
...,...,...,...,...,...,...,...
101,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio di Stato di Firenze,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.76833,11.26954,Free Entry
102,http://dati.beniculturali.it/mibact/luoghi/res...,Domus mazziniana,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,43.711628,10.398145,Free Entry
103,http://dati.beniculturali.it/mibact/luoghi/res...,Archivio della fotografia,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,44.11465,9.810335,Free Entry
104,http://dati.beniculturali.it/mibact/luoghi/res...,Villa Vicentini Miniussi,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,41.86956,12.568359,Free Entry


Unnamed: 0,Archive_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,7
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,2
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,7
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,4
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,10
5,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,4
6,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,7
7,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,4
8,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,7
9,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,9


Unnamed: 0,Archive_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
1,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
2,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,3
3,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
4,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
...,...,...,...
96,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
97,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
98,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1
99,http://dati.beniculturali.it/mibact/luoghi/res...,Free Entry,1


In [13]:
#CHEKING FOR ANY ARCHIVE WITH A TICKET COST

#Finding if there is any archive with a TICKET COST for entry (only 1 found!)

import re
def extract_id(entity_id):
    match = re.search(r'/(\d+)_', entity_id)
    if match:
        numeric_id = match.group(1)  
        price = df_ticket_cost.loc[df_ticket_cost['Entity_ID'] == entity_id, 'Entry_Price'].iloc[0]  
        return (numeric_id, price)  
    return None  
numeric_ids_with_prices= df_ticket_cost['Entity_ID'].apply(extract_id).tolist()


#Script for checking if any Archive Ids in Library Dataframe match with Ids in numeric_ids list made from TICKET COST entities Dataframe
def check_id_in_archive_id(archive_id):
    for numeric_id, price in numeric_ids_with_prices:
        if numeric_id in archive_id:
            return price  
    return None  



#removing wrong matches from dataframes of archive ticket cost 
df_archives['Ticket_Cost'] = df_archives['Archive_ID'].apply(check_id_in_archive_id)
df_archive_ticket_cost = df_archives[df_archives['Ticket_Cost'].notnull()].reset_index(drop=True)
IDs_to_remove=["http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/120075",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/117164",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/120161",
               "http://dati.beniculturali.it/mibact/luoghi/resource/CulturalInstituteOrSite/120074"
                ]
df_archive_ticket_cost = df_archive_ticket_cost[~df_archive_ticket_cost['Archive_ID'].isin(IDs_to_remove)]
df_archive_ticket_cost['Ticket_Cost'] = df_archive_ticket_cost['Ticket_Cost'].str.replace(',', '.').astype(float).map('{:.2f}'.format)

#Dataframe for archives with a ticket cost, their region/city count and a total count
df_archive_ticket_cost_region_counts = df_archive_ticket_cost.groupby(['Archive_Region', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_archive_ticket_cost_region_counts ['Count'].sum()
total_row = pd.DataFrame({'Archive_Region': ['TOTAL'], 'Count': [total_count]})
df_archive_ticket_cost_region_counts  = pd.concat([df_archive_ticket_cost_region_counts , total_row], ignore_index=True).fillna('')

df_archive_ticket_cost_city_counts = df_archive_ticket_cost.groupby(['Archive_City', 'Ticket_Cost']).size().reset_index(name='Count')
total_count = df_archive_ticket_cost_city_counts['Count'].sum()
total_row = pd.DataFrame({'Archive_City': ['TOTAL'], 'Count': [total_count]})
df_archive_ticket_cost_city_counts  = pd.concat([df_archive_ticket_cost_city_counts, total_row], ignore_index=True).fillna('')


#Displaying dataframes of archive ticket cost entites, their region/city count and a total count
display(df_archive_ticket_cost)
display(df_archive_ticket_cost_region_counts)
display(df_archive_ticket_cost_city_counts)

# CSV of above Dataframes
# csv_file_archive_ticket_cost = "Archive__ticket_cost.csv"
# df_archive_ticket_cost.to_csv(csv_file_archive_ticket_cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_ticket_cost }")

# csv_file_archive_ticket_cost_region = "Archive_ticket_cost_region_count.csv"
# df_archive_ticket_cost_region_counts.to_csv(csv_file_archive_ticket_cost_region , index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_ticket_cost_region }")

# csv_file_archive_ticket_cost_city_count = "Archive_ticket_cost_city_count.csv"
# df_archive_ticket_cost_city_counts.to_csv(csv_file_archive_ticket_cost_city_count, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_archive_ticket_cost_city_count}")



Unnamed: 0,Archive_ID,Archive_Name,Archive_City,Archive_Region,Archive_Latitude,Archive_Longitude,Ticket_Cost
4,http://dati.beniculturali.it/mibact/luoghi/res...,Casa Zegna,http://dati.beniculturali.it/mibact/luoghi/res...,http://dati.beniculturali.it/mibact/luoghi/res...,45.67137,8.157712,3.0


Unnamed: 0,Archive_Region,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,3.0,1
1,TOTAL,,1


Unnamed: 0,Archive_City,Ticket_Cost,Count
0,http://dati.beniculturali.it/mibact/luoghi/res...,3.0,1
1,TOTAL,,1


In [14]:
""" After identifiyng separately how many archives, museums, libraries have a ticket cost or a free entry also divided per region and city, 
we decided to compare Museum, Archives, Librarires numbers according to their ticket cost/free entry aspect and then compare their distribution for regions and cities"""


#Dataframes containing numbers for Museums, Libraries, Archives with a free entry cost
num_museums_free_entry = df_museum_free_entry_cost['Museum_ID'].nunique()
num_archives_free_entry = df_archive_free_ticket_cost['Archive_ID'].nunique()
num_libraries_free_entry = df_library_free_entry_cost['Library_ID'].nunique()

#Creating an unique dataframes with all numbers for Museums, Libraries, Archives with a free entry cost
df_all_numbers_MAL_free_entry=pd.DataFrame({
    'Museums': [num_museums_free_entry],
    'Archives': [num_archives_free_entry],
    'Libraries': [num_libraries_free_entry]
})

#Creatin an header for the main dataframe with all numbers for Museums, Libraries, Archives with a free entry cost
new_header = ['Numbers of Museums, Archives, Libraries with a free entry cost'] + [''] * (len(df_all_numbers_MAL_free_entry.columns)-1)
df_all_numbers_MAL_free_entry.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_MAL_free_entry))

# Remove the index
df_all_numbers_MAL_free_entry.index = ['']

# Center-align the numbers
df_all_numbers_free_entry_styled =df_all_numbers_MAL_free_entry.style.set_properties(**{'text-align': 'center'}).set_table_styles([dict(selector='th', props=[('text-align', 'center')])])


display(df_all_numbers_free_entry_styled)

# CSV of above Dataframe
# csv_file_all_numbers_free_entry = "Numbers_of_MALs_with_free_entry_cost.csv"
# df_all_numbers_MAL_free_entry.to_csv(csv_file_all_numbers_free_entry, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_all_numbers_free_entry}")



#Dataframes containing numbers for Museums, Libraries, Archives with a ticket cost
num_museums_ticket_cost = df_museum_ticket_cost['Museum_ID'].nunique()
num_archives_ticket_cost = df_archive_ticket_cost['Archive_ID'].nunique()
num_libraries_ticket_cost = df_library_ticket_cost['Library_ID'].nunique()

#Creating an unique dataframes with all numbers for Museums, Libraries, Archives with a ticket cost
df_all_numbers_MAL_ticket_cost=pd.DataFrame({
    'Museums': [num_museums_ticket_cost],
    'Archives': [num_archives_ticket_cost],
    'Libraries': [num_libraries_ticket_cost]
})

#Creatin an header for the main dataframe with all numbers for Museums, Libraries, Archives with a ticket cost
new_header = ['Numbers of Museums, Archives, Libraries with a ticket entry cost'] + [''] * (len(df_all_numbers_MAL_ticket_cost.columns)-1)
df_all_numbers_MAL_ticket_cost.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_MAL_ticket_cost))

# Remove the index
df_all_numbers_MAL_ticket_cost.index = ['']

# Center-align the numbers
df_all_numbers_ticket_cost_styled = df_all_numbers_MAL_ticket_cost.style.set_properties(**{'text-align': 'center'}).set_table_styles([dict(selector='th', props=[('text-align', 'center')])])


display(df_all_numbers_ticket_cost_styled)

# CSV of above Dataframe
# csv_file_all_numbers_ticket_cost = "Numbers_of_MALs_with_ticket_entry_cost.csv"
# df_all_numbers_MAL_ticket_cost.to_csv(csv_file_all_numbers_ticket_cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_all_numbers_ticket_cost}")



Unnamed: 0_level_0,"Numbers of Museums, Archives, Libraries with a free entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0
Unnamed: 0_level_1,Museums,Archives,Libraries
,1370,106,37


Unnamed: 0_level_0,"Numbers of Museums, Archives, Libraries with a ticket entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0
Unnamed: 0_level_1,Museums,Archives,Libraries
,1186,1,1


In [15]:
""" In the next step we want to create a dataframe with all museums, archives, libraries and the free-entry number distribution for each region. This will require Modification of each archives, museums and libraries dataframe with free entry and their region counts. 
This approach will be useful for the merge of all the single dataframes into one."""


#Creating dataframes for free entry museums, libraries and archives and their regions (only the names) and their count
df_museum_free_entry_region_counts_modified=pd.DataFrame()
df_museum_free_entry_region_counts_modified['Region'] = df_museum_free_entry_region_counts['Museum_Region'].str.split('/').str[-1]
df_museum_free_entry_region_counts_modified ['Museums_Number']= df_museum_free_entry_region_counts['Count']


df_archive_free_entry_region_counts_modified=pd.DataFrame()
df_archive_free_entry_region_counts_modified['Region'] = df_archive_free_entry_region_counts['Archive_Region'].str.split('/').str[-1]
df_archive_free_entry_region_counts_modified['Archives_Number']= df_archive_free_entry_region_counts['Count']


df_libraries_free_entry_region_counts_modified=pd.DataFrame()
df_libraries_free_entry_region_counts_modified['Region'] = df_library_free_entry_region_counts['Library_Region'].str.split('/').str[-1]
df_libraries_free_entry_region_counts_modified['Libraries_Number']= df_library_free_entry_region_counts['Count']


# display (df_museum_free_entry_region_counts_modified)
# display (df_archive_free_entry_region_counts_modified)
# display (df_libraries_free_entry_region_counts_modified)


#Merging of all libraries, archives and museums dataframes and region count into one dataframe
df_all_numbers_region_free_entry = df_museum_free_entry_region_counts_modified.merge(df_archive_free_entry_region_counts_modified, on='Region', how='outer').merge(df_libraries_free_entry_region_counts_modified, on='Region', how='outer')

#Changing Nan values into '0' and having all integers number in Archives_Number Column
df_all_numbers_region_free_entry['Archives_Number'] = df_all_numbers_region_free_entry['Archives_Number'].fillna(0).astype(int)

#Changing Nan values into '0' and having all integers number in Libraries_Number Column       
df_all_numbers_region_free_entry['Libraries_Number'] = df_all_numbers_region_free_entry['Libraries_Number'].fillna(0).astype(int)


#Creation of an header for the final dataframe
new_header = ['Numbers per region of Museums, Archives, Libraries with a free entry cost'] + [''] * (len(df_all_numbers_region_free_entry.columns) - 1)
df_all_numbers_region_free_entry.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_region_free_entry.columns))
display(df_all_numbers_region_free_entry)


# CSV of above Dataframe
# csv_file_all_numbers_region = "Numbers_of_MALs_Per_Region_Free_Entry.csv"
# df_all_numbers_region_free_entry.to_csv(csv_file_all_numbers_region , index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_all_numbers_region}")


Unnamed: 0_level_0,"Numbers per region of Museums, Archives, Libraries with a free entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0
Unnamed: 0_level_1,Region,Museums_Number,Archives_Number,Libraries_Number
0,Abruzzo,27,7,0
1,Basilicata,7,2,1
2,Calabria,26,7,0
3,Campania,161,4,3
4,Emilia-Romagna,347,10,2
5,Friuli-Venezia_Giulia,104,4,2
6,Lazio,177,7,12
7,Liguria,107,4,1
8,Lombardia,157,7,3
9,Marche,54,9,1


In [16]:
""" In the next step we want to create a dataframe with all museums, archives libraries and the free-entry numbers distribution for each city. This will require Modification of each archives, museums and libraries dataframe with free entry and their city counts. 
This approach will be useful for the merge of all the single dataframes into one."""



#Creating modified dataframes for museums, libraries and archives with their city (only the name) and their count
df_museum_free_entry_city_counts_modified=pd.DataFrame()
df_museum_free_entry_city_counts_modified['City'] = df_museum_free_entry_city_counts['Museum_City'].str.split('/').str[-1]
df_museum_free_entry_city_counts_modified ['Museums_Number']= df_museum_free_entry_city_counts['Count']


df_archive_free_entry_city_counts_modified=pd.DataFrame()
df_archive_free_entry_city_counts_modified['City'] = df_archive_free_entry_city_counts['Archive_City'].str.split('/').str[-1]
df_archive_free_entry_city_counts_modified['Archives_Number']= df_archive_free_entry_city_counts['Count']


df_libraries_free_entry_city_counts_modified=pd.DataFrame()
df_libraries_free_entry_city_counts_modified['City'] = df_library_free_entry_city_counts['Library_City'].str.split('/').str[-1]
df_libraries_free_entry_city_counts_modified['Libraries_Number']= df_library_free_entry_city_counts['Count']


# display (df_museum_free_entry_city_counts_modified)
# display (df_archive_free_entry_city_counts_modified)
# display (df_libraries_free_entry_city_counts_modified)


#Mergin of all libraries, archives and museums dataframes and city count into one dataframe
df_all_numbers_city_free_entry = df_museum_free_entry_city_counts_modified.merge(df_archive_free_entry_city_counts_modified, on='City', how='outer').merge(df_libraries_free_entry_city_counts_modified, on='City', how='outer')

#Changing NaN values into '0' and having integers number in all columns
df_all_numbers_city_free_entry['Museums_Number'] = df_all_numbers_city_free_entry['Museums_Number'].fillna(0).astype(int)
df_all_numbers_city_free_entry['Archives_Number'] = df_all_numbers_city_free_entry['Archives_Number'].fillna(0).astype(int)
df_all_numbers_city_free_entry['Libraries_Number'] = df_all_numbers_city_free_entry['Libraries_Number'].fillna(0).astype(int)

total_index = df_all_numbers_city_free_entry[df_all_numbers_city_free_entry['City'] == 'TOTAL'].index[0]
total_row = df_all_numbers_city_free_entry.loc[total_index]
df_all_numbers_city_free_entry.drop(index=total_index, inplace=True)
df_all_numbers_city_free_entry = df_all_numbers_city_free_entry.sort_values(by='City')
df_all_numbers_city_free_entry= pd.concat ([df_all_numbers_city_free_entry, total_row.to_frame().T], ignore_index=True)




# #Creation of an header for the final dataframe
new_header = ['Numbers per city of Museums, Archives, Libraries with a free entry cost'] + [''] * (len(df_all_numbers_city_free_entry.columns) - 1)
df_all_numbers_city_free_entry.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_city_free_entry.columns))

display(df_all_numbers_city_free_entry)


# CSV of above Dataframe
# csv_file_all_numbers_city = "Numbers_of_MALs_Per_city_Free_Entry.csv"
# df_all_numbers_city_free_entry.to_csv(csv_file_all_numbers_city , index=False, encoding='utf-8')
# print(f"Results have been written to {csv_file_all_numbers_city}")

Unnamed: 0_level_0,"Numbers per city of Museums, Archives, Libraries with a free entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0
Unnamed: 0_level_1,City,Museums_Number,Archives_Number,Libraries_Number
0,Acerra,3,0,0
1,Aci_Castello,1,0,0
2,Acqualagna,1,0,0
3,Acqui_Terme,2,0,0
4,Adro,1,0,0
...,...,...,...,...
873,Zibello,1,0,0
874,Zignago,1,0,0
875,Zocca,1,0,0
876,Zogno,1,0,0


In [17]:
""" In this step instead we want to create a dataframe with all museums, archives libraries and the ticket cost numbers distribution for each region. This will require Modification of each archives, museums and libraries dataframe with a ticket cost and their region counts. 
This approach will be useful for the merge of all the single dataframes into one."""


#Creating a modified dataframe for museums with a ticket cost, their region (only the names) and count
df_museum_ticket_cost_region_counts_modified=pd.DataFrame()
df_museum_ticket_cost_region_counts_modified['Region'] = df_museum_ticket_cost_region_counts['Museum_Region'].str.split('/').str[-1]
df_museum_ticket_cost_region_counts_modified ['Museums_Number']= df_museum_ticket_cost_region_counts['Count']
df_museum_ticket_cost_region_counts_modified = df_museum_ticket_cost_region_counts_modified.groupby('Region', as_index=False)['Museums_Number'].sum()
total_index = df_museum_ticket_cost_region_counts_modified[df_museum_ticket_cost_region_counts_modified['Region'] == 'TOTAL'].index[0]
total_row = df_museum_ticket_cost_region_counts_modified.loc[total_index]
df_museum_ticket_cost_region_counts_modified.drop(index=total_index, inplace=True)
df_museum_ticket_cost_region_counts_modified= pd.concat ([df_museum_ticket_cost_region_counts_modified, total_row.to_frame().T], ignore_index=True)


#Creating a modified dataframe for archives with a ticket cost, their region (only the names) and count
df_archive_ticket_cost_region_counts_modified=pd.DataFrame()
df_archive_ticket_cost_region_counts_modified['Region'] = df_archive_ticket_cost_region_counts['Archive_Region'].str.split('/').str[-1]
df_archive_ticket_cost_region_counts_modified['Archives_Number']= df_archive_ticket_cost_region_counts['Count']

#Creating a modified dataframe for libraries with a ticket cost, their region (only the names) and count
df_libraries_ticket_cost_region_counts_modified=pd.DataFrame()
df_libraries_ticket_cost_region_counts_modified['Region'] = df_library_ticket_cost_region_counts['Library_Region'].str.split('/').str[-1]
df_libraries_ticket_cost_region_counts_modified['Libraries_Number']= df_library_ticket_cost_region_counts['Count']


# display (df_museum_ticket_cost_region_counts_modified)
# display (df_archive_ticket_cost_region_counts_modified)
# display (df_libraries_ticket_cost_region_counts_modified)


# Merging of all libraries, archives and museums dataframes with a ticket cost and region count into one dataframe
df_all_numbers_region_ticket_cost = df_museum_ticket_cost_region_counts_modified.merge(df_archive_ticket_cost_region_counts_modified, on='Region', how='outer').merge(df_libraries_ticket_cost_region_counts_modified, on='Region', how='outer')


# #Changing Nan values into '0' and having all integers number in Archives_Number Column
df_all_numbers_region_ticket_cost['Archives_Number'] = df_all_numbers_region_ticket_cost['Archives_Number'].fillna(0).astype(int)

#Changing Nan values into '0'  and having all integers number in Libraries_Number Column       
df_all_numbers_region_ticket_cost['Libraries_Number'] = df_all_numbers_region_ticket_cost['Libraries_Number'].fillna(0).astype(int)


# #Creation of an header for the final dataframe
new_header = ['Numbers per region of Museums, Archives, Libraries with a ticket entry cost'] + [''] * (len(df_all_numbers_region_ticket_cost.columns) - 1)
df_all_numbers_region_ticket_cost.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_region_ticket_cost.columns))
display(df_all_numbers_region_ticket_cost)

# CSV of above Dataframe
# csv_all_numbers_region_ticket_cost = "Numbers_of_MALs_Per_region_Ticket_Cost.csv"
# df_all_numbers_region_ticket_cost.to_csv(csv_all_numbers_region_ticket_cost , index=False, encoding='utf-8')
# print(f"Results have been written to {csv_all_numbers_region_ticket_cost}")

Unnamed: 0_level_0,"Numbers per region of Museums, Archives, Libraries with a ticket entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0
Unnamed: 0_level_1,Region,Museums_Number,Archives_Number,Libraries_Number
0,Abruzzo,40,0,0
1,Basilicata,14,0,0
2,Calabria,20,0,0
3,Campania,45,0,0
4,Emilia-Romagna,112,0,0
5,Friuli-Venezia_Giulia,39,0,0
6,Lazio,107,0,0
7,Liguria,74,0,0
8,Lombardia,181,0,0
9,Marche,96,0,0


In [18]:
""" In the next step  we want to create a dataframe with all museums, archives libraries and the ticket cost numbers distribution for each city. This will require Modification of each archives, museums and libraries dataframe with free entry and their city counts. 
This approach will be useful for the merge of all the single dataframes into one."""



#Creating a modified dataframe for museums with a ticket cost, their city(only the names) and count
df_museum_ticket_cost_city_counts_modified=pd.DataFrame()
df_museum_ticket_cost_city_counts_modified['City'] = df_museum_ticket_cost_city_counts['Museum_City'].str.split('/').str[-1]
df_museum_ticket_cost_city_counts_modified['Museums_Number']= df_museum_ticket_cost_city_counts['Count']
df_museum_ticket_cost_city_counts_modified = df_museum_ticket_cost_city_counts_modified.groupby('City', as_index=False)['Museums_Number'].sum()
total_index = df_museum_ticket_cost_city_counts_modified[df_museum_ticket_cost_city_counts_modified['City'] == 'TOTAL'].index[0]
total_row = df_museum_ticket_cost_city_counts_modified.loc[total_index]
df_museum_ticket_cost_city_counts_modified.drop(index=total_index, inplace=True)
df_museum_ticket_cost_city_counts_modified=pd.concat ([df_museum_ticket_cost_city_counts_modified, total_row.to_frame().T], ignore_index=True)

#Creating a modified dataframe for archives with a ticket cost, their city (only the names) and count
df_archive_ticket_cost_city_counts_modified=pd.DataFrame()
df_archive_ticket_cost_city_counts_modified['City'] = df_archive_ticket_cost_city_counts['Archive_City'].str.split('/').str[-1]
df_archive_ticket_cost_city_counts_modified['Archives_Number']= df_archive_ticket_cost_city_counts['Count']

#Creating a modified dataframe for libraries with a ticket cost, their city (only the names) and count
df_libraries_ticket_cost_city_counts_modified=pd.DataFrame()
df_libraries_ticket_cost_city_counts_modified['City'] = df_library_ticket_cost_city_count['Library_City'].str.split('/').str[-1]
df_libraries_ticket_cost_city_counts_modified['Libraries_Number']= df_library_ticket_cost_city_count['Count']


# display (df_museum_ticket_cost_city_counts_modified)
# display (df_archive_ticket_cost_city_counts_modified)
# display (df_libraries_ticket_cost_city_counts_modified)


# Merging of all libraries, archives and museums dataframes with a ticket cost for city count into one dataframe
df_all_numbers_city_ticket_cost = df_museum_ticket_cost_city_counts_modified.merge(df_archive_ticket_cost_city_counts_modified, on='City', how='outer').merge(df_libraries_ticket_cost_city_counts_modified, on='City', how='outer')


# #Changing Nan values into '0' and having all integers number in Archives_Number Column
df_all_numbers_city_ticket_cost['Archives_Number'] = df_all_numbers_city_ticket_cost['Archives_Number'].fillna(0).astype(int)

#Changing Nan values into '0'  and having all integers number in Libraries_Number Column       
df_all_numbers_city_ticket_cost['Libraries_Number'] = df_all_numbers_city_ticket_cost['Libraries_Number'].fillna(0).astype(int)
total_index = df_all_numbers_city_ticket_cost[df_all_numbers_city_ticket_cost['City'] == 'TOTAL'].index[0]
total_row = df_all_numbers_city_ticket_cost.loc[total_index]
df_all_numbers_city_ticket_cost.drop(index=total_index, inplace=True)
df_all_numbers_city_ticket_cost = df_all_numbers_city_ticket_cost.sort_values(by='City')
df_all_numbers_city_ticket_cost= pd.concat ([df_all_numbers_city_ticket_cost, total_row.to_frame().T], ignore_index=True)




# #Creation of an header for the final dataframe
new_header = ['Numbers per City of Museums, Archives, Libraries with a ticket entry cost'] + [''] * (len(df_all_numbers_city_ticket_cost.columns) - 1)
df_all_numbers_city_ticket_cost.columns = pd.MultiIndex.from_tuples(zip(new_header, df_all_numbers_city_ticket_cost.columns))
display(df_all_numbers_city_ticket_cost)

# CSV of above Dataframe
# csv_all_numbers_city_ticket_cost = "Numbers_of_MALs_Per_City_Ticket_Cost.csv"
# df_all_numbers_city_ticket_cost.to_csv(csv_all_numbers_city_ticket_cost, index=False, encoding='utf-8')
# print(f"Results have been written to {csv_all_numbers_city_ticket_cost}")

Unnamed: 0_level_0,"Numbers per City of Museums, Archives, Libraries with a ticket entry cost",Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0
Unnamed: 0_level_1,City,Museums_Number,Archives_Number,Libraries_Number
0,Abano_Terme,1,0,0
1,Abbadia_Lariana,1,0,0
2,Aci_Castello,1,0,0
3,Acquapendente,2,0,0
4,Acqui_Terme,1,0,0
...,...,...,...,...
705,Zavattarello,1,0,0
706,Zogno,1,0,0
707,Zola_Predosa,1,0,0
708,Zuglio,1,0,0
