Building a notebook that reformats data (csv) into JSON

Edited by: Uriel

In [16]:
#First step: import needed libraries

import json
import pandas as pd

In [17]:
#Second step: load csv file

data=pd.read_csv(r"C:\Users\urido\Downloads\mapping-data-main\sample_data\wine-ratings.csv", delimiter=",")

data.head(5)

Unnamed: 0.1,Unnamed: 0,name,grape,region,variety,rating,notes
0,0,1000 Stories Bourbon Barrel Aged Batch Blue Ca...,,"Mendocino, California",Red Wine,91.0,"This is a very special, limited release of 100..."
1,1,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,89.0,The California Gold Rush was a period of coura...
2,2,1000 Stories Bourbon Barrel Aged Gold Rush Red...,,California,Red Wine,90.0,The California Gold Rush was a period of coura...
3,3,1000 Stories Bourbon Barrel Aged Zinfandel 2013,,"North Coast, California",Red Wine,91.0,"The wine has a deep, rich purple color. An int..."
4,4,1000 Stories Bourbon Barrel Aged Zinfandel 2014,,California,Red Wine,90.0,Batch #004 is the first release of the 2014 vi...


In [18]:
#Third step: retrieving data from region

spanish_wines=data[data['region'].str.contains("spain",case=False,na=False)]

spanish_wines

Unnamed: 0.1,Unnamed: 0,name,grape,region,variety,rating,notes
7,7,12 Linajes Crianza 2014,,"Ribera del Duero, Spain",Red Wine,92.0,Red with violet hues. The aromas are very inte...
8,8,12 Linajes Reserva 2012,,"Ribera del Duero, Spain",Red Wine,94.0,"On the nose, a complex predominance of mineral..."
46,46,4 Monos Tinto 2012,,Spain,Red Wine,92.0,"Brilliant red. Scents of violets, plums, and h..."
54,54,A Coroa Godello 2009,,Spain,White Wine,91.0,Aromas of lemon peel on the nose. On the palat...
55,55,A Coroa Godello 2013,,"Valdeorras, Spain",White Wine,90.0,Aromas of lemon peel on the nose. On the palat...
...,...,...,...,...,...,...,...
32082,32082,Layer Cake Garnacha 2011,,Spain,Red Wine,90.0,"Our Layer Cake Garnacha from Calatayud, Spain ..."
32534,32534,Legaris Crianza 2005,,"Ribera del Duero, Spain",Red Wine,90.0,A wine made with 100% Tinta Fina variety. Its ...
32535,32535,Legaris Crianza 2009,,"Ribera del Duero, Spain",Red Wine,90.0,Deep garnet red with hues of the same color. H...
32536,32536,Legaris Crianza 2010,,"Ribera del Duero, Spain",Red Wine,90.0,A very intense picota cherry red with violet h...


In [19]:
#Fourth step: convert dataframe to list of dictionaries as similar as a JSON format 
#Renaming first column to id
spanish_wines= spanish_wines.rename(columns={'Unnamed: 0': 'id'})

#Creating a dict, orient records converts every row into a list of dictionaries, is the most common parameter
data_to_json = spanish_wines.to_dict(orient='records')

In [20]:
#Fifth step: save the json file, open it in write mode
with open('spanish_wines.json', 'w') as f:
    json.dump(data_to_json,f,indent=6)

In [None]:
import pandas as pd
import json

# Cargar CSV
df = pd.read_csv('wines.csv')

# Filtrar por región que contenga 'spain'
df_spain = df[df['region'].str.contains('spain', case=False, na=False)]

# Renombrar la columna 'Unnamed: 0' a 'id' si es necesario
df_spain = df_spain.rename(columns={'Unnamed: 0': 'id'})

# Agrupar por región y convertir a estructura anidada
grouped = {}

for region, group_df in df_spain.groupby('region'):
    # Convertimos cada grupo (región) a lista de diccionarios
    grouped[region] = group_df.drop(columns=['region']).to_dict(orient='records')

# Guardar como JSON
with open('spanish_wines_grouped_by_region.json', 'w', encoding='utf-8') as f:
    json.dump(grouped, f, ensure_ascii=False, indent=4)

print("Archivo JSON agrupado por región guardado correctamente.")


In [None]:
import pandas as pd
import json

# Cargar CSV
df = pd.read_csv('wines.csv')

# Filtrar por región que contenga 'spain'
df_spain = df[df['region'].str.contains('spain', case=False, na=False)]

# Renombrar 'Unnamed: 0' a 'id' si aplica
df_spain = df_spain.rename(columns={'Unnamed: 0': 'id'})

# Diccionario anidado: región → variedad → lista de vinos
grouped_nested = {}

# Agrupamos por región primero
for region, region_group in df_spain.groupby('region'):
    grouped_nested[region] = {}
    
    # Dentro de cada región, agrupamos por variedad
    for variety, variety_group in region_group.groupby('variety'):
        # Quitamos 'region' y 'variety' porque ya están representadas como claves
        wine_list = variety_group.drop(columns=['region', 'variety']).to_dict(orient='records')
        
        grouped_nested[region][variety] = wine_list

# Guardar el JSON anidado
with open('spanish_wines_grouped_by_region_and_variety.json', 'w', encoding='utf-8') as f:
    json.dump(grouped_nested, f, ensure_ascii=False, indent=4)

print("Archivo JSON agrupado por región y variedad guardado correctamente.")
