# Exploring the data
In this notebook, we show you how to navigate the data. 

In [1]:
import numpy as np
import pandas as pd
import os
import datetime
from STGNNBrain.config import Config

In [2]:
# Get the data path
config = Config()
user_ID = config.current_user_ID
data_pth = config.data_pth[user_ID]

In [7]:
# time series data for one resting state scan
scan_ID = "sub014.txt"
scan_data = pd.read_csv(os.path.join(data_pth, "connectivity_aa116", scan_ID), sep=",", header=None).to_numpy()

In [9]:
scan_data[1:, 1:]

array([[ 1.        ,  0.8029669 ,  0.7442662 , ...,  0.22868134,
         0.35761756, -0.21914223],
       [ 0.80296695,  1.        ,  0.7940493 , ...,  0.02752594,
         0.41501802, -0.41324878],
       [ 0.7442662 ,  0.7940494 ,  1.        , ..., -0.01177641,
         0.3736553 , -0.3186635 ],
       ...,
       [ 0.22868134,  0.02752594, -0.01177641, ...,  1.        ,
         0.30737093,  0.07695326],
       [ 0.35761756,  0.41501802,  0.3736553 , ...,  0.3073709 ,
         1.        , -0.14759922],
       [-0.21914221, -0.41324878, -0.3186635 , ...,  0.07695326,
        -0.14759922,  1.        ]])

In [4]:
print(f"Number of time points = {scan_data.shape[0]}")
print(f"Number of parcels = {scan_data.shape[1]}")

Number of time points = 518
Number of parcels = 630


In [5]:
# parcel metadata
parcel_metadata = pd.read_csv(os.path.join(data_pth, "parcel_data.txt"), sep = "\t", header=None)
parcel_metadata.columns = ["ID", "Hemisphere", "X_Coordinate", "Y_Coordinate", "Z_Coordinate", 
                            "Lobe", "Region", "Network", "7Network_Label", "17Network_Label"]
parcel_metadata.head()

Unnamed: 0,ID,Hemisphere,X_Coordinate,Y_Coordinate,Z_Coordinate,Lobe,Region,Network,7Network_Label,17Network_Label
0,1,L,-8.05,-45.62,38.36,Parietal,isthmuscingulate,DMN,7Networks_7,17Networks_16
1,2,L,-17.27,-33.34,70.16,Frontal,superiorparietal,Somatomotor,7Networks_2,17Networks_3
2,3,L,-53.26,-4.77,37.48,Frontal,precentral,Somatomotor,7Networks_2,17Networks_4
3,4,L,-6.78,10.31,54.54,Frontal,superiorfrontal,Ventral_Attention,7Networks_4,17Networks_7
4,5,L,-14.99,-95.83,17.0,Occipital,superiorparietal,Visual_2,7Networks_1,17Networks_1


In [6]:
# labeling the data fasted vs non-fasted
metadata = pd.read_csv(os.path.join(data_pth, 'trackingdata_goodscans.txt'), sep = "\t")
dayofweek = []
for i in range(metadata.shape[0]):
    dt = metadata["date"][i]
    year, month, day = (int(x) for x in dt.split('-'))    
    ans = datetime.date(year, month, day)
    dayofweek.append(ans.strftime('%A'))
metadata["dayofweek"] = dayofweek

* Mondays: Afternoon scan (non-fasted)
* Tuesdays: Morning scan (fasted)
* Thursdays: Morning scan (unfasted and caffeinated)

In [7]:
metadata[["subcode", "dayofweek"]].head()

Unnamed: 0,subcode,dayofweek
0,sub014,Thursday
1,sub015,Monday
2,sub016,Tuesday
3,sub017,Thursday
4,sub018,Monday


In [84]:
print(f"Number of Tuesday scans = {metadata[metadata['dayofweek'] == 'Tuesday'].shape[0]}")
print(f"Number of Thursday scans = {metadata[metadata['dayofweek'] == 'Thursday'].shape[0]}")

Number of Tuesday scans = 40
Number of Thursday scans = 32


In [8]:
labels = metadata.query("dayofweek == 'Tuesday' or dayofweek == 'Thursday'")

In [9]:
# if dayofweek Tuesday, caffeinated = 0, else caffeinated = 0
labels["caffeinated"] = 1
labels.loc[labels["dayofweek"] == "Tuesday", "caffeinated"] = 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  labels["caffeinated"] = 1


In [11]:
labels = labels[["subcode", "dayofweek", "caffeinated"]]

In [12]:
labels.to_csv(os.path.join(data_pth, "labels.csv"), index=False)

In [22]:
for file in os.listdir(os.path.join(data_pth, "timeseries_aa116")):
    s = file.split("-")
    new_name = f"sub{s[-1]}"
    os.rename(os.path.join(data_pth, "timeseries_aa116", file), os.path.join(data_pth, "timeseries_aa116", new_name))