In [87]:
import json
from pathlib import Path
import pandas as pd

# Storing Metadata in Machine-Readable Text Files: JSON as a Metadata format

## Parsing JSON Data
  - Understanding the JSON Format
  - How to read (`loads()`)

**Exercises**

**Example**: Use the JSON-formatted text below to extract the subject name from the data:

In [5]:
text = '{"Session": 35, "Subject": "Samuel", "date": "2022-02-05"}'

In [7]:
data = json.loads(text)
data['Subject']

'Samuel'

Use the JSON-formatted text below to extract the date from the data:

In [9]:
text = '{"Session": 35, "Subject": "Samuel", "Date": "2022-02-05"}'

In [11]:
data = json.loads(text)
data['Date']

'2022-02-05'

Use the JSON-formatted text below to extract the image height from the data:

In [22]:
text = """
{
  "Session": 32366,
  "Date": "2023-09-12",
  "TestSession": false,
  "Image": {
    "Width": 720,
    "Height": 1080,
    "Format": "RGB"
  }
}
"""

In [23]:
data = json.loads(text)
data['Image']['Height']

1080

Use the JSON-formatted text below to count how many images are in the collected sequence (tip: use the `len()` function to count items in a collection):

In [17]:
text = """
{
  "Date": "2023-08-30",
  "Images": ["im001.tif", "im002.tif", "im003.tif", "im004.tif"]
}
"""

In [21]:
data = json.loads(text)
len(data['Images'])

4

---

## Writing and Reading JSON Data from a File

| Code | Description |
| :-- | :-- |
|    |   |

**Exercises**

Write the following data into a JSON-formatted text file called `session1.json`:

In [36]:
data = {"Subject": "Jenny", "DOB": "1971-06-14"}

In [40]:
text = json.dumps(data)
Path("session1.json").write_text(text)


41

Read `session1.json` back into Python and parse the text back into a dictionary to make sure it was written correctly:

In [41]:
text = Path("session1.json").read_text()
data = json.loads(text)
data

{'Subject': 'Jenny', 'DOB': '1971-06-14'}

Write the following data into a JSON-formatted text file called `session2.json`.  To make the file easier to read in a text editor, put each variable on its own indented line:

In [48]:
data = {"session_id": 12314, 'brain_region': "V1", "subject": "mousey112", 'height': 1920, 'width': 1080, 'format': 'RGB', 'order': 'F', "inverted": True}
data

{'session_id': 12314,
 'brain_region': 'V1',
 'subject': 'mousey112',
 'height': 1920,
 'width': 1080,
 'format': 'RGB',
 'order': 'F',
 'inverted': True}

In [49]:
text = json.dumps(data, indent=3)
Path("session2.json").write_text(text)


173

Read `session2.json` back into Python and parse the text back into a dictionary to make sure it was written correctly:

In [1]:
text = Path("session2.json").read_text()
data = json.loads(text)
data

NameError: name 'Path' is not defined

Run the following code to generate the `image_data` folder, which contains a session's image acquisition data parameters:

In [124]:
import json, random
from pathlib import Path

random.seed(42)

for _ in range(10):

    # Generate random parameters
    params = {
        "exposure_time": random.choice([100, 200, 300]),  # milliseconds
        "laser_power": random.choice([5, 10, 15]),  # milliwatts
        "num_frames": random.randint(200, 400),
        "frame_rate": random.choice([10, 20, 30]),  # Hz
        "region_of_interest": random.choice(["ROI1", "ROI2", "ROI3"]),
    }
    if random.random() > 0.5:
        params['start_time'] = random.randint(1, 5000)  # seconds

    # Write the data to a json file
    session_num = random.randint(1, 300)
    experimenter = random.choice(["Sophie", "Florian"])
    path = Path(f"image_data/{experimenter}_{session_num}/session.json")
    path.parent.mkdir(parents=True, exist_ok=True)
    json_text = json.dumps(params, indent=3)
    path.write_text(json_text)


Read and Parse the JSON-formatted data in session 72, to get the exposure time.

In [119]:
data = json.loads(Path("image_data/Sophie_72/session.json").read_text())
data['exposure_time']

300

Read and Parse the JSON-formatted data in session 177, to get the frame rate.

In [118]:
data = json.loads(Path("image_data/Florian_177/session.json").read_text())
data['frame_rate']

10

Use `list(Path().glob(pattern))` to list all the the JSON session files in the `image_data` folder (tip: use the wildcard "*" whereever there are variable parts in the filename)

In [112]:
list(Path().glob("image_data/session_*/session.json"))

[]

Read and parse all the `session.json` files and put them into a Pandas DataFrame. Here is a code template to help you get started:

```python
sessions = []
for path in Path().glob("image_data/Sophie_16/session.json"):
    text = path.read_text()
    session = {"A": 3}
    sessions.append(session)

df = pd.DataFrame(sessions)
df
```

In [120]:
sessions = []
for path in Path().glob("image_data/*/session.json"):
    session = json.loads(path.read_text())
    sessions.append(session)

df = pd.DataFrame(sessions)
df

Unnamed: 0,exposure_time,laser_power,num_frames,frame_rate,region_of_interest,start_time
0,300,15,292,30,ROI1,376.0
1,100,10,226,10,ROI2,
2,100,15,317,30,ROI1,3101.0
3,200,15,271,10,ROI1,2788.0
4,300,15,339,10,ROI3,
5,100,10,297,20,ROI3,1800.0
6,100,5,225,20,ROI2,
7,100,5,329,30,ROI1,4465.0
8,300,5,206,30,ROI2,
9,200,10,253,30,ROI2,585.0


Read and parse all the `session.json` files and put them into a Pandas DataFrame. Include the experimenter name and the session ID from the parent folder's name (tip: `Path().parent.name`). 

In [123]:
sessions = []
for path in Path().glob("image_data/*/session.json"):
    session = {}
    experimenter, session_id = path.parent.name.split('_')
    session['session_id'] = session_id
    session['experimenter'] = experimenter
    session |= json.loads(path.read_text())
    sessions.append(session)

df = pd.DataFrame(sessions)
df

Unnamed: 0,session_id,experimenter,exposure_time,laser_power,num_frames,frame_rate,region_of_interest,start_time
0,117,Florian,300,15,292,30,ROI1,376.0
1,177,Florian,100,10,226,10,ROI2,
2,41,Florian,100,15,317,30,ROI1,3101.0
3,143,Sophie,200,15,271,10,ROI1,2788.0
4,16,Sophie,300,15,339,10,ROI3,
5,167,Sophie,100,10,297,20,ROI3,1800.0
6,187,Sophie,100,5,225,20,ROI2,
7,215,Sophie,100,5,329,30,ROI1,4465.0
8,72,Sophie,300,5,206,30,ROI2,
9,88,Sophie,200,10,253,30,ROI2,585.0
