### 2024-03-29 齊藤先生にアノテーションしてもらったストローク画像のファイル名を取得し、表形式で記録する

In [45]:
import pandas as pd
import glob
import os
import requests
import io

In [39]:
# 引数に指定したフォルダの中にあるpngファイルを、ドローイングidとストロークidが入ったデータフレームに変換して返す
def get_ids(folder_path):
    # ファイル名の取得
    file_names = []
    for f in glob.glob(folder_path + '/*.png'):
        file_names.append(os.path.split(f)[1])
    
    # ドローイングidとストロークidの取得
    did_sids = []
    for name in file_names:
        did, sid = name.split('_')
        sid = sid.replace('.png', '')
        did_sids.append([int(did), int(sid)])
    
    df_id = pd.DataFrame(did_sids)
    df_id.columns = ['drawing_id', 'stroke_id']
    
    return df_id

In [59]:
folder_path = '../temp/test'
id_df = get_ids(folder_path)
print(len(id_df))

110


In [46]:
def get_drawing_data(url, drawing_id, rotate_type, times):
    class DrawingData():
        def __init__(self, url, drawing_id, rotate_type, times):
            self.CANVAS_HEIGHT = 297
            self.CANVAS_WIDTH = 420
            self.MAX_POINT = 943
            self.NORM_DIVISOR = 3
            self.columns = [ 'time', 'alpha', 'max_points', 'ptx_anoto', 'pty_anoto', 'shape_str',
                        'pressure_style', 'len_mm_acc', 'len_anoto', 'pressure_avg', 'shape_int']
            self.BIN_HEIGHT = 420
            self.BIN_WIDTH = 420
            
            self.df_org = None
            self.df_mm = None
            
            self.url = url
            self.drawing_id = drawing_id
            self.rotate_type = rotate_type
            self.times = times
            
            # urlからデータを作成
            self.get_df(self.url)
            self.conv_str_to_coord()

            ################################
            
        # URLからデータフレームを取得 & ストローク長の列を追加
        def get_df(self, url):
            content = requests.get(url, auth=('19t2003a', 'ireneRED77')).content
            df = pd.read_table(io.StringIO(content.decode('utf-8')), header = None)
            df.columns = self.columns
            st_len_mm = [l*0.3 for l in df['len_anoto']]
            df['len_mm'] = st_len_mm
            # ストローク固有のidを付与
            df_reindex = df.reset_index()
            df_reindex = df_reindex.rename(columns={'index': 'stroke_id'})
            
            # ドローイングidの列を追加
            df_reindex['drawing_id'] = [self.drawing_id for i in range(len(df_reindex))]
            # 回数の列を追加
            df_reindex['times'] = [self.times for i in range(len(df_reindex))]
            
            self.df_org = df_reindex.copy()
            
        # 文字列座標データをリストに変換 & anotoから普通座標に変換
        def conv_str_to_coord(self):
            df = self.df_org.copy()
            x_list = []
            y_list = []
            for str_x, str_y in zip(df['ptx_anoto'], df['pty_anoto']):
                x_list.append([float(x)*0.3 for x in str_x.split(',')])
                y_list.append([float(y)*0.3 for y in str_y.split(',')])
            df['ptx_mm'] = x_list
            df['pty_mm'] = y_list
            self.df_mm = df
            
            
    data = DrawingData(url, drawing_id, rotate_type, times)
    
    return data.df_mm


In [49]:
# ドローイングidとストロークidに座標データなどを対応付けた表を作成
drawing_info = pd.read_excel('../data/all_drawing_info.xlsx', index_col=0)
box_info = drawing_info.query('is_preanalysis_box == 1')
bag_info = drawing_info.query('is_preanalysis_bag == 1')

# 予備解析のみのデータを作成
preana_info = pd.concat([box_info, bag_info], axis=0)
preana_dfs = []
for url, did, rtype, times in zip(
    preana_info['url'], preana_info['drawing_id'], preana_info['rotate_type'], preana_info['times']):
    preana_dfs.append(get_drawing_data(url, did, rtype, times))

In [51]:
preana_df = pd.concat(preana_dfs, axis=0)

In [58]:
annotation_rows = []
for did, sid in zip(id_df['drawing_id'], id_df['stroke_id']):
    row = preana_df.query('drawing_id == @did').query('stroke_id == @sid')
    annotation_rows.append(row)
    
annotation_df = pd.concat(annotation_rows, axis=0)

In [60]:
print(len(annotation_df))

110
