In [6]:
import pandas as pd

# file path
file_path = 'datasets/libraries.xlsx'

# Import the dataset into a pandas dataframe
df = pd.read_excel(file_path)

# Display all the fields (columns) of the dataframe
fields = df.columns.tolist()
print("Fields in the dataframe:")
print(fields)


Fields in the dataframe:
['codice_isil', 'codice_sbn', 'denominazione', 'indirizzo', 'cap', 'comune', 'codice_istat_comune', 'provincia', 'regione', 'codice_istat_provincia', 'latitudine', 'longitudine', 'telefono', 'fax', 'email', 'url']


In [7]:
# List of fields to remove
fields_to_remove = ['codice_sbn', 'codice_istat_comune', 'codice_istat_provincia']

# Drop the fields from the dataframe
df = df.drop(columns=fields_to_remove, errors='ignore')

# Display the remaining fields
print("Remaining fields in the dataframe:")
print(df.columns.tolist())


Remaining fields in the dataframe:
['codice_isil', 'denominazione', 'indirizzo', 'cap', 'comune', 'provincia', 'regione', 'latitudine', 'longitudine', 'telefono', 'fax', 'email', 'url']


In [8]:
# Show the number of records in the original dataframe
print(f"Number of records in the original dataframe: {len(df)}")

# Create a new dataframe by removing records with NaN values in any field
df_filtered = df.dropna()

# Show the number of records in the filtered dataframe
print(f"Number of records in the filtered dataframe: {len(df_filtered)}")


Number of records in the original dataframe: 13625
Number of records in the filtered dataframe: 5677


In [9]:
# Create a new dataframe containing only records with "Pisa" in the "comune" field
df_pisa = df_filtered[df_filtered['comune'] == 'Pisa']

# Print the number of records in the df_pisa dataframe
print(f"Number of records in the df_pisa dataframe: {len(df_pisa)}")


Number of records in the df_pisa dataframe: 26


In [10]:
# Save df_pisa as a pretty JSON file
df_pisa.to_json('branches_dataset.json', orient='records', lines=False, indent=4)

print("The df_pisa dataframe has been saved as a pretty JSON file: 'branches_dataset.json'")


The df_pisa dataframe has been saved as a pretty JSON file: 'branches_dataset.json'
