In [158]:
from glob import glob
import os

root = '../data/raw/hardware-data/'

def getFilenams(csv_filename):
    # Get test name
    group_name = os.path.basename(csv_filename).split('_')[2:4]
    group_name = '_'.join(group_name).split('.')[0]
    # Compute filenames
    hardware_log = 'hardware_log_' + group_name + '.csv' 
    hardware_specs = 'hardware_' + group_name + '.json' 
    metrics = 'metrics_' + group_name + '.json' 
    return (hardware_log, hardware_specs, metrics)

tests = []
for test_group in glob(root + '*.csv'):
    (hwlog, hwspecs, metrics) = getFilenams(test_group)
    tests.append((hwlog, hwspecs, metrics))

In [159]:
def cpuInfoToModel(info):
    if 'Intel64 Family 6 Model 94 Stepping 3' in info:
        return 'i5-6500 @ 3.20GHz'
    elif 'Intel64 Family 6 Model 158 Stepping 9' in info:
        return 'i5-7600 @ 3.5GHz'
    else:
        return info
def fixGPUName(gpu):
    if 'GeForce GTX 960' in gpu:
        return 'GTX 960'
    if 'AMD Radeon HD 6800 Series' in gpu:
        return 'Radeon HD 6800'
    else: return gpu
    
def storageRename(storage):
    if storage == 'CT500MX500SSD1':
        return 'SSD'
    if storage == 'ST2000DM001-9YN164':
        return 'HDD'
    else: return storage
    

In [160]:
from datetime import datetime, timedelta

def getHardwareLog(start_date, end_date, hwlogpath):
    df = pd.read_csv(hwlogpath, header=[1])
    df['Time'] = pd.to_datetime(df['Time'])  
    mask = (df['Time'] >= start_date) & (df['Time'] <= end_date)
    segment = df.loc[mask]
    minimized = pd.DataFrame()
    minimized['Time'] = segment['Time']
    minimized['CPU Total'] = segment['CPU Total']
    minimized['GPU Core'] = segment['GPU Core']
    return minimized
    
def getBostonTimes(init_datetime, total_time):
    # -5 to adjust for timezone boston vs utc     
    init_datetime_boston = init_datetime - timedelta(hours=5)
    end_datetime_boston = init_datetime_boston + timedelta(seconds=total_time)
    return (init_datetime_boston.strftime('%Y/%m/%d %H:%M:%S'), end_datetime_boston.strftime('%Y/%m/%d %H:%M:%S'))    

def computeHWLogMetrics(metrics_df, hwlogpath):
    aggregate = pd.DataFrame()
    for index, iteration in metrics_df.iterrows():
        game = iteration['game_name']
        start, end = getBostonTimes(iteration['init_time'], iteration['total_time'])
        hw_df = getHardwareLog(start, end, hwlogpath)
        aggregate = aggregate.append(hw_df, ignore_index=True)
        if iteration['it'] == 9:
            # Save the iteration set for the game
            filename = '_'.join([game, iteration['CPU'], iteration['GPU'], iteration['storage']]) + '.csv'
            aggregate['idx'] = np.arange(len(aggregate)) 
            aggregate.to_csv('../data/interim/hardware-data/main/' + filename)
            aggregate = pd.DataFrame()

In [161]:
import pandas as pd
import json
import numpy as np

config_id = 0
aggregate_df =  pd.DataFrame()
for (hwlogpath, hwspecspath, metricspath) in tests:
    f = open(root + hwspecspath)
    hw_specs = json.load(f)
    cpu = hw_specs['CPU']['cpu']
    gpu = hw_specs['GPUs'][0]['name']
    drives = hw_specs['drives']
    storage_drive = [drive['model'] for drive in drives if drive['interface'] == 'IDE'][0]
    df = pd.read_json(root + metricspath)
    # Filter out bad points wich are recomputed as extra iterations
    df = df[df['total_time'] < 200]
    df['it'] = np.arange(len(df)) % 10
    df['GPU'] = fixGPUName(gpu)
    df['CPU'] = cpuInfoToModel(cpu)
    df['storage'] = storageRename(storage_drive)
    df['config_id'] = config_id
    config_id += 1

    aggregate_df = aggregate_df.append(df, ignore_index=True)
    
    computeHWLogMetrics(df, root + hwlogpath)

In [162]:
aggregate_df.to_json('../data/interim/hardware-data/main/aggregate_hw_metrics.json')

In [163]:
aggregate_df.to_csv('../data/interim/hardware-data/main/aggregate_hw_metrics.csv')