# 데이터 라벨링 산출물 분석
- 라벨링된 클래스 별로 csv파일로 분류하기

### 데이터셋 로드
- data/Dataset_221127.csv 파일 로드 

In [6]:
import numpy as np
import pandas as pd

dataset = pd.read_csv('../data/Dataset_221127.csv', index_col = 'id')

# 현재 0~12500, 14001~ 까지 라벨링됨
valid_class = ['0', '1', '2', '3', '4', '5']
df_labeled = pd.merge(dataset.iloc[0:12501], dataset.iloc[14001:], how='outer')
df_positive = df_labeled[(df_labeled["label"] == 1) & (df_labeled["label2"] == '-1')]
df_negative = df_labeled[(df_labeled["label"] == 0) & (df_labeled["label2"].isin(valid_class))]

In [77]:
import numpy as np
import pandas as pd

class LabeledClassToCsv:
    valid_class = ['0', '1', '2', '3', '4', '5']
    
    def __init__(self, filePath, index_col = 'id', isLabelDone = False):
        self.dataset = pd.read_csv(filePath, index_col=index_col)
        if not isLabelDone:
            self.df_negative = self.getOutterNegatives(0, 12501, 14001)
        else:
            self.df_negative = self.getAllNegatives()

    def getOutterNegatives(self, leftFrom, leftTo, rightFrom):
        df_labeled = pd.merge(self.dataset.iloc[leftFrom:leftTo], self.dataset.iloc[rightFrom:], how='outer')
        return df_labeled[(df_labeled["label"] == 0) & (df_labeled["label2"].isin(valid_class))]
    
    def getAllNegatives(self):
        return self.dataset[(self.dataset["label"] == 0) & (self.dataset["label2"].isin(valid_class))]
    
    """
        csv파일로 저장하는 부분
    """
    def saveAsCsv(self, class_no:str, filePath, encoding="utf-8-sig"):
        df_target = self.df_negative[(self.df_negative["label"] == 0) & (self.df_negative["label2"] == class_no)]
        try:
            # df_target.to_excel(excel_writer=fileName, encoding="utf-8")
            df_target.to_csv(filePath, encoding=encoding)
        except Exception as e:
            print("[DEBUG] : Failed saving Pandas DataFrames as Excel file", e)

    def saveAllAsCsv(self, filePath):
        fileType = ".csv"
        for class_no in valid_class:
            self.saveAsCsv(class_no, filePath + class_no + fileType)
        print("[INFO] : Saved All valid cass as csv files")

In [78]:
filePath = '../data/Dataset_221127.csv'
stat = LabeledClassToCsv(filePath)
stat.saveAllAsCsv("labeled")

[INFO] : Saved All valid cass as csv files
