# Coding Club Python Pandas Example

Processing data presented by Lois at Coding Club November 21st 2022.

In [None]:
from pathlib import Path
import pandas as pd
import seaborn as sns

# Specify directory containing csv files with metrics exported from MIM
data_directory = Path("./data/CNS/CNS_CT_MRI_MIM/")

# Load these into a pandas DataFrame, transposing as required.
df = pd.DataFrame()
for p in data_directory.glob("*.csv"):
    df_pat = pd.read_csv(p, index_col=0)
    pat_id = p.name.split("^")[0]
    df_pat = df_pat.transpose()
    df_pat["pat_id"] = pat_id
    df = pd.concat([df, df_pat])

# Resetting the index is needed since each individual csv tracked their own
# index. This just renumbers the index for each row from 1,2,3...N
df = df.reset_index()

# Some column renaming
cols = list(df.columns)
cols[0] = "Contour"
df.columns = cols

# Data massaging to bring this into a form suitable for seaborn
df_filtered = df.melt(id_vars=['Contour', 'Series Date', "pat_id"])
df_filtered["Contour Name"] = df_filtered.Contour.apply(lambda x: " ".join(x.split(" ")[:-1]))
df_filtered["Image"] = df_filtered.Contour.apply(lambda x: x.split(" ")[-1])
df_filtered["value"] = df_filtered["value"].apply(lambda x: None if x is None or "-" in x else x)
df_filtered["value"] = df_filtered["value"].astype(float)
df_filtered = df_filtered[~df_filtered.Contour.str.startswith("Line ")]

# Produce some plots using seaborn
sns.set(rc={'figure.figsize':(11.7,8.27)})
ax=sns.boxplot(
    data=df_filtered[df_filtered.variable.str.startswith("Volume")],
    y="value",
    x="Contour Name",
    hue="Image"
)
ax.tick_params(axis='x', rotation=90)
ax.set(xlabel='Contour', ylabel='Volume')
