# Initial Converter

In this notebook we are converting the classification of the two measurement series

- 11th_tip3_pure3_500mV_12000pm_2000pA_5000ms_114
- 0_max_middle_500mV_8000pm_2000pA_5000ms_-5

into a classification file that obeys our specification.

Members of the freiheit data science faction can get the data by running the `getData.sh` script. It will download and extract the example data to the `gitignored/` folder.

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
input_path = "gitignored/"

tip3_df = pd.read_csv(input_path + "11th_tip3_pure3_500mV_12000pm_2000pA_5000ms_114_results.txt", names=["file_name", "label", "parameter"], delimiter="\t", header=None, dtype=str)
tip3_df["file_name"] = "11th_tip3_pure3_500mV_12000pm_2000pA_5000ms_114_" + tip3_df["file_name"]

max_df = pd.read_csv(input_path + "0_max_middle_500mV_8000pm_2000pA_5000ms_-5_all.txt", names=["file_name", "label", "parameter"], delimiter="\t", header=None, dtype=str)
max_df["file_name"] = "0_max_middle_500mV_8000pm_2000pA_5000ms_-5_" + max_df["file_name"]

df = pd.DataFrame({"file_name": tip3_df["file_name"], "label": tip3_df["label"]})
df = df.append(pd.DataFrame({"file_name": max_df["file_name"], "label": max_df["label"]}), ignore_index=True)

df["label"] = df["label"].astype(int)

print("number of images: {}".format(len(df)))

df.head(20)

In [None]:
hist_data = []
plot_bins = range(7)

for i in plot_bins:
    hist_data.append(len(df[df["label"] == i]))

_ = plt.bar(plot_bins, hist_data)
plt.title("class distribution in data set")
plt.xlabel("classes")
plt.ylabel("number of images")
plt.grid()

In [None]:
for i in range(len(plot_bins)):
    print("label: {}; number {}".format(plot_bins[i], hist_data[i]))

In [None]:
# We'll remap labels, so they are consistent with the presentation paper minus one. 0 at us means 0th element of cathegory -> 1 in the paper.
df['label'] = df['label'].astype("category")
df['label'].cat.categories = [
    1, # Right
    0, # Left
    3, # Bottom
    2, # Top
    4, # Top <-> Left|Right
    5, # Bottom <-> Left|Right
]
df.to_csv(input_path + "fresh_classification.csv", sep="\t")

In [None]:
hist_data = []
plot_bins = range(6)

for i in plot_bins:
    hist_data.append(len(df[df["label"] == i]))

_ = plt.bar(plot_bins, hist_data)
plt.title("class distribution in data set")
plt.xlabel("classes")
plt.ylabel("number of images")
plt.grid()

In [None]:
for i in range(len(plot_bins)):
    print("label: {}; number {}".format(plot_bins[i], hist_data[i]))

In [None]:
df["file_name"] = df["file_name"] + ".png"
df.to_csv(input_path + "classification.csv", sep="\t")