# Log2Csv Converter
ATRで取得したデータをADLtaggerで扱えるように形式変換する. 処理できるATRのセンサで取得したデータは日分のみ(timestampで日にちを確定できない)

### [次でバックして!!!] timestampの小数点以下の0が桁落ち ==> str.zfill(3) + .astype(str)を使え!!!

In [2]:
import os
import shutil # shutil.rmtree(path)でdirectory tree全体を削除(空でなくても)
import pandas as pd
import datetime as dt
import csv


class Log2ADL(object):
    """
    Convert log files which generated by ATR acceleration sensor into ADL tagger format
    """
    def __init__(self, date_str, t_shift, path_to_log_dir='./log/', path_to_output_dir='./ADL/'):
        """
        Argument
        --------
        date_str: str, the date which logs were recorded. (ex. '2017-09-21')
        t_shift: int, to adjust timestump between PC and ATR sensors.[ms]
        path_log_dir/path_output_dir: Path to the log/output file directory
        """
        self.path_to_log_dir = path_to_log_dir
        self.path_to_output_dir = path_to_output_dir
        # タイムスタンプを合わせるための基準時刻を生成
        self.base_timestamp = dt.datetime.strptime(date_str, '%Y-%m-%d') + dt.timedelta(milliseconds=t_shift)
        print("Class was successfully generated [base_timestump=",self.base_timestamp,"]")
    
    
    """Read Log Files"""
    def read_logs(self):
        # Step.1: 処理するLogファイルの一覧を取得
        print("Step1: Get log files.")
        log_list = os.listdir(self.path_to_log_dir)
        print(">> Success: ",log_list, "\n")

        # Step.2: logファイルをデータフレームに変換
        print("Step2: Convert log files to pd.DataFrame.")
        df = []
        for file_name in log_list:
            if file_name.find('.log') > 0:
                path_to_file = self.path_to_log_dir + file_name
                with open(path_to_file, 'r') as f:
                    reader, x = csv.reader(f), []
                    for row in reader:
                        if "ags" in row: x.append(row)
                    print(">> Read CSV ["+ path_to_file + ']  ==> Sucess(', len(x), "rows)")
                    df = df + x
        self.df = pd.DataFrame(df, columns=["sensor", "time_ATR", "accX", "accY", "accZ", "gyroX", "gyroY", "gyroZ"])
        print(">> Success: df.shape=", self.df.shape, "\n")
        return self.df
    
    
    """Add Timestamps"""
    def generate_timestamp(self,time):
        """Convert an ATR timestamp into a datetime object"""
        # Params >> time(integer or str)
        # Return >> datetime.datatime
        
        # Convert milliseconds to r60
        time = int(time)
        milliseconds, time = time%1000, int(time/1000)
        seconds,      time = time%60,   int(time/60)
        minutes,      time = time%60,   int(time/60)
        hours,        time = time%60,   int(time/60)
        # Error Check
        if time > 1:
            print(">> Error: timestamp of ATR sensor is invaild format.")
        # 基準時間と合わせる
        new_time = self.base_timestamp + dt.timedelta(milliseconds=milliseconds, seconds=seconds, minutes=minutes, hours=hours)
        return new_time

    def add_timestamps(self):
        print("Step3: Add Timestamps.")
        df = self.df.sort_values(by=["time_ATR"], ascending=True).reset_index(drop=True)
        df["timestamp"] = df["time_ATR"].apply(self.generate_timestamp)
        df["time"], df["time_milli"] = df["timestamp"].dt.strftime('%Y%m%d_%H:%M:%S.'), df["timestamp"].dt.microsecond // 1000
        df["time"] = df["time"].astype(str) + df["time_milli"].astype(str).str.zfill(3)
        #df["time"] = df["timestamp"].apply(lambda x: x.strftime('%Y%m%d_%H:%M:%S.') + "%03d" % (x.microsecond // 1000))
        df["group"] = df["timestamp"].dt.strftime('%Y%m%d_%H%M')
        self.df = df
        print(">> Success: df.shape=", self.df.shape, "\n")
        return df

    
    """Write output"""
    def activate_dir(self, target_path, dir_name):
        if not os.path.isdir(target_path+dir_name):
            # 存在しない場合はディレクトリを作成
            os.mkdir(target_path+dir_name)
            if os.path.isdir(target_path+dir_name):
                print(">> Directory was created ["+ target_path+dir_name +"]")
        return target_path  + dir_name + '/'

    def to_csvs(self):
        print("Step4: Write CSVs.")
        groups = self.df["group"].drop_duplicates().reset_index(drop=True)
        df = self.df
        # Outディレクトリをクリア
        if os.path.isdir(self.path_to_output_dir):
            shutil.rmtree(self.path_to_output_dir)
        os.mkdir(self.path_to_output_dir)
        print(">> Clean output directory.")
        for group in groups:
            #　書き込む行を選択
            df_selected = df[df["group"] == group].sort_values(by=["timestamp"])
            # 書き込むディレクトリを選択: Acc
            ## ディレクトリの確認
            target_path = self.activate_dir(self.path_to_output_dir, "acc2")
            target_path = self.activate_dir(target_path, "acc2_R")
            ## 書き込みファイルを指定
            target_file_name = group+"00_acc2.csv"
            # CSV書き込み
            df_selected[["time", "accX", "accY", "accZ"]].to_csv(target_path+'/'+target_file_name, index=False, header=["time", "x", "y", "z"])
            print(">> write", target_path+target_file_name)

            # 書き込むディレクトリを選択: Gyro
            ## ディレクトリの確認
            target_path = self.activate_dir(self.path_to_output_dir, "Gyro")
            target_path = self.activate_dir(target_path, "gyro")
            ## 書き込みファイルを指定
            target_file_name = group+"00_gyro.csv"
            # CSV書き込み
            df_selected[["time", "accX", "accY", "accZ"]].to_csv(target_path+'/'+target_file_name, index=False, header=["time", "x", "y", "z"])
            print(">> write", target_path+target_file_name)
        print(">> Success:", len(groups), "files were created.\n")
        
        
# converter = Log2ADL('2017-09-21', 100)
# df_foo = converter.read_logs()
# df_foo = converter.add_timestamps()
# df_foo = converter.to_csvs()

In [26]:
"""
Main 関数
"""
ts_minutes = 10
ts_seconds = ts_minutes*60 + 88 - 5 + 0.45
ts_ms      = ts_seconds*1000
print("ts_ms:",  ts_ms)

converter = Log2ADL('2018-01-31', ts_ms,  path_to_log_dir='./data/log/', path_to_output_dir='./data/ADL/')
df_foo = converter.read_logs()

#converter.df = converter.df[:50000]

df_foo = converter.add_timestamps()
converter.to_csvs()
display(df_foo.head())

ts_ms: 683450.0
Class was successfully generated [base_timestump= 2018-01-31 00:11:23.450000 ]
Step1: Get log files.
>> Success:  ['32_torisuke.log'] 

Step2: Convert log files to pd.DataFrame.
>> Read CSV [./data/log/32_torisuke.log]  ==> Sucess( 595970 rows)
>> Success: df.shape= (595970, 8) 

Step3: Add Timestamps.
>> Success: df.shape= (595970, 12) 

Step4: Write CSVs.
>> Clean output directory.
>> Directory was created [./data/ADL/acc2]
>> Directory was created [./data/ADL/acc2/acc2_R]
>> write ./data/ADL/acc2/acc2_R/20180131_212800_acc2.csv
>> Directory was created [./data/ADL/Gyro]
>> Directory was created [./data/ADL/Gyro/gyro]
>> write ./data/ADL/Gyro/gyro/20180131_212800_gyro.csv
>> write ./data/ADL/acc2/acc2_R/20180131_212900_acc2.csv
>> write ./data/ADL/Gyro/gyro/20180131_212900_gyro.csv
>> write ./data/ADL/acc2/acc2_R/20180131_213000_acc2.csv
>> write ./data/ADL/Gyro/gyro/20180131_213000_gyro.csv
>> write ./data/ADL/acc2/acc2_R/20180131_213100_acc2.csv
>> write ./data/ADL/

Unnamed: 0,sensor,time_ATR,accX,accY,accZ,gyroX,gyroY,gyroZ,timestamp,time,time_milli,group
0,ags,76649091,658,1954,10022,213,-720,87,2018-01-31 21:28:52.541,20180131_21:28:52.541,541,20180131_2128
1,ags,76649092,665,1945,9996,220,-704,78,2018-01-31 21:28:52.542,20180131_21:28:52.542,542,20180131_2128
2,ags,76649093,675,2001,10018,214,-682,78,2018-01-31 21:28:52.543,20180131_21:28:52.543,543,20180131_2128
3,ags,76649094,680,2025,9991,214,-686,64,2018-01-31 21:28:52.544,20180131_21:28:52.544,544,20180131_2128
4,ags,76649095,665,2006,9961,199,-701,75,2018-01-31 21:28:52.545,20180131_21:28:52.545,545,20180131_2128


In [8]:
"""
Main 関数: 2018.02.01
"""
ts_minutes = 0
ts_seconds = ts_minutes*60
ts_ms      = ts_seconds*1000
print("ts_ms:",  ts_ms)

sub_name = "45_maekawa"
path_to_log_dir = "/root/upconversion/data/2018_01_16/{}/log/".format(sub_name)
path_to_output_dir = "/root/upconversion/data/2018_01_16/{}/data/".format(sub_name)

converter = Log2ADL('2018-02-04', ts_ms,  path_to_log_dir=path_to_log_dir, path_to_output_dir=path_to_output_dir)
df_foo = converter.read_logs()

#converter.df = converter.df[:50000]

df_foo = converter.add_timestamps()
converter.to_csvs()
display(df_foo.head())

ts_ms: 0
Class was successfully generated [base_timestump= 2018-02-04 00:00:00 ]
Step1: Get log files.
>> Success:  ['45_maekawa.log'] 

Step2: Convert log files to pd.DataFrame.
>> Read CSV [/root/upconversion/data/2018_01_16/45_maekawa/log/45_maekawa.log]  ==> Sucess( 714080 rows)
>> Success: df.shape= (714080, 8) 

Step3: Add Timestamps.
>> Success: df.shape= (714080, 12) 

Step4: Write CSVs.
>> Clean output directory.
>> Directory was created [/root/upconversion/data/2018_01_16/45_maekawa/data/acc2]
>> Directory was created [/root/upconversion/data/2018_01_16/45_maekawa/data/acc2/acc2_R]
>> write /root/upconversion/data/2018_01_16/45_maekawa/data/acc2/acc2_R/20180204_121300_acc2.csv
>> Directory was created [/root/upconversion/data/2018_01_16/45_maekawa/data/Gyro]
>> Directory was created [/root/upconversion/data/2018_01_16/45_maekawa/data/Gyro/gyro]
>> write /root/upconversion/data/2018_01_16/45_maekawa/data/Gyro/gyro/20180204_121300_gyro.csv
>> write /root/upconversion/data/2018_

Unnamed: 0,sensor,time_ATR,accX,accY,accZ,gyroX,gyroY,gyroZ,timestamp,time,time_milli,group
0,ags,43992341,41,121,9971,-22,-146,182,2018-02-04 12:13:12.341,20180204_12:13:12.341,341,20180204_1213
1,ags,43992342,70,128,9944,-52,-150,168,2018-02-04 12:13:12.342,20180204_12:13:12.342,342,20180204_1213
2,ags,43992343,75,121,9986,-72,-129,168,2018-02-04 12:13:12.343,20180204_12:13:12.343,343,20180204_1213
3,ags,43992344,70,123,9983,-60,-123,174,2018-02-04 12:13:12.344,20180204_12:13:12.344,344,20180204_1213
4,ags,43992345,58,153,9915,-46,-143,164,2018-02-04 12:13:12.345,20180204_12:13:12.345,345,20180204_1213
