In [52]:
import json
import numpy as np 
import pandas as pd
from glob import glob   
import shutil
import os
import matplotlib.pyplot as plt
import PIL.Image as pilimg
import argparse
import sys
from tqdm import tqdm
from decimal import Decimal
import pickle

## Feature-Extractor-JSON

In [54]:
def feature_extractor_json(json_data, assembly_name):
    
    df_features=pd.DataFrame()
    df_features.loc[0, 'assembly_name']=assembly_name

    """
    Functional variable - "surface_type" and "xyz_moments_of_inertia"
    """
    
    surface_type_list=['ConeSurfaceType', 'CylinderSurfaceType','EllipticalConeSurfaceType', 'EllipticalCylinderSurfaceType','NurbsSurfaceType', 'PlaneSurfaceType', 'SphereSurfaceType','TorusSurfaceType']
    xyz_moments_list=['xx','yy','zz','xy','yz','xz']

    df_features[surface_type_list+xyz_moments_list]=0
    for types in json_data['properties']['surface_types']:
        df_features.loc[0, types['surface_type']]=types['face_count']

    df_features.loc[0, xyz_moments_list]=Decimal(json_data['properties']['xyz_moments_of_inertia']['xx']), Decimal(json_data['properties']['xyz_moments_of_inertia']['yy']), Decimal(json_data['properties']['xyz_moments_of_inertia']['zz']), Decimal(json_data['properties']['xyz_moments_of_inertia']['xy']), Decimal(json_data['properties']['xyz_moments_of_inertia']['yz']), Decimal(json_data['properties']['xyz_moments_of_inertia']['xz'])

    """
    Physical variable - 'vertex_count', 'edge_count', 'face_count', 'loop_count', 'shell_count', 'body_count'
    """
    
    cnt_list=['vertex_count', 'edge_count', 'face_count', 'loop_count', 'shell_count', 'body_count']
    df_features.loc[0, cnt_list] = json_data['properties']['vertex_count'], json_data['properties']['edge_count'], json_data['properties']['face_count'], json_data['properties']['loop_count'], json_data['properties']['shell_count'], json_data['properties']['body_count']
    
    return df_features

##### Generate feature dataset using assembly.json files

In [12]:
path=r'C:/Users/IDEA/Dropbox/IDEA_tmp/행정/개인폴더/최지아/2022 ASME Hackathon/problem3/assembly-jsons/'
json_files=os.listdir(path)

In [17]:
df=pd.DataFrame()
for file in json_files:
    with open(path+file, 'r') as f:
        json_data = json.load(f)
        assembly_name=file.replace('.json',"")
    df=pd.concat([df, feature_extractor_json(json_data, assembly_name)], axis=0)

In [19]:
df.head()

Unnamed: 0,assembly_name,ConeSurfaceType,CylinderSurfaceType,EllipticalConeSurfaceType,EllipticalCylinderSurfaceType,NurbsSurfaceType,PlaneSurfaceType,SphereSurfaceType,TorusSurfaceType,xx,...,zz,xy,yz,xz,vertex_count,edge_count,face_count,loop_count,shell_count,body_count
0,112724_d31b37f0,4,132,0,10,80,110,8,36,3.02426138226721974433530704118311405181884765625,...,5.31636172675093643391619480098597705364227294...,-1.0505979070873032732436058722669258713722229...,4.61230762471842581362579949200153350830078125E-8,1.56421495645098218574275961145758628845214843...,640.0,1006.0,380.0,414.0,4.0,4.0
0,112724_fc7e904f,18,68,0,0,110,97,0,32,0.80268656198418231184632531949318945407867431...,...,0.80267713170740095929289736886858008801937103...,-6.4456043018595493388709629853372540253531042...,0.00000948359470325562361927695248509095904410...,1.13935833613156484845399304809722418330153459...,508.0,776.0,325.0,392.0,4.0,4.0
0,112738_1107ad1c,0,27,0,0,0,37,0,0,1.03066617601382648494734439736930653452873229...,...,3.26676589504266878805083251791074872016906738...,0.08663077480225056525853233324596658349037170...,5.33752193574548786905893671406175826632534153...,1.36838510173650927231910051770288894168459137...,95.0,127.0,64.0,100.0,4.0,4.0
0,112743_4379a6b3,0,0,0,0,0,17,0,0,31421375.1505596339702606201171875,...,8327428.111097807995975017547607421875,-540113.232500507496297359466552734375,-1995.467897473503171568154357373714447021484375,-2964.1821118453617600607685744762420654296875,26.0,39.0,17.0,17.0,2.0,2.0
0,112751_bc27f8f7,6,50,0,0,29,114,0,4,175.8515497820253585814498364925384521484375,...,138.5631503476178068012814037501811981201171875,0.13698189282414252598840676000691018998622894...,0.00023297693035001509898052063363138586282730...,0.00001892999940642550084509386498776706275748...,194.0,247.0,203.0,257.0,49.0,49.0


In [42]:
# df.to_excel('../problem3/feature-extractor-output/train_json.xlsx',index=False)

###### Genearte validation feature dataset 

In [25]:
path=r'C:/Users/IDEA/Dropbox/IDEA_tmp/행정/개인폴더/최지아/2022 ASME Hackathon/problem3/val-jasons/'
val_files=os.listdir(path)

In [27]:
df_val=pd.DataFrame()
for file in val_files:
    with open(path+file, 'r') as f:
        json_data = json.load(f)
        assembly_name=file.replace('.json',"")
    df_val=pd.concat([df_val, feature_extractor_json(json_data, assembly_name)], axis=0)

In [29]:
# df_val.to_excel('../problem3/feature-extractor-output/val_json.xlsx',index=False)

##### Generate test feature dataset

In [55]:
path=r'C:/Users/IDEA/Dropbox/IDEA_tmp/행정/개인폴더/최지아/2022 ASME Hackathon/problem3/assembly-jsons-test/'
test_files=os.listdir(path)

In [56]:
df_test=pd.DataFrame()
for file in test_files:
    with open(path+file, 'r') as f:
        json_data = json.load(f)
        assembly_name=file.replace('.json',"")
    df_test=pd.concat([df_test, feature_extractor_json(json_data, assembly_name)], axis=0)

In [57]:
df_test

Unnamed: 0,assembly_name,ConeSurfaceType,CylinderSurfaceType,EllipticalConeSurfaceType,EllipticalCylinderSurfaceType,NurbsSurfaceType,PlaneSurfaceType,SphereSurfaceType,TorusSurfaceType,xx,...,zz,xy,yz,xz,vertex_count,edge_count,face_count,loop_count,shell_count,body_count
0,38929_e41edd3a,12,16,0,0,0,30,0,4,2074.5027944543062403681688010692596435546875,...,1817.870079917328666851972229778766632080078125,-664.21630620820224066847003996372222900390625,764.85514048217237359494902193546295166015625,559.5610995292444158621947281062602996826171875,56.0,72.0,62.0,82.0,13.0,13.0
0,41685_df8ac866,8,77,0,0,0,151,4,2,13576.047666118274719337932765483856201171875,...,30364.08600736437438172288239002227783203125,11490.530164496625729952938854694366455078125,-3335.79021382667133366339839994907379150390625,6119.339354595003896974958479404449462890625,344.0,475.0,242.0,367.0,22.0,22.0
0,43337_e436b759,0,103,0,2,232,152,34,4,2752149.4768525636754930019378662109375,...,2638079.8370785634033381938934326171875,17.93078039696047198958694934844970703125,601206.112197213689796626567840576171875,-2.84624367972395475590019486844539642333984375,715.0,1130.0,527.0,627.0,17.0,17.0
0,48116_e48b3d1e,3,62,0,0,128,47,0,51,0.02129370478377620723442475991760147735476493...,...,1.50842871938103351148185993224615231156349182...,-1.8851403504106272563790820456439134567006021...,6.88134260318218588309898965356750655253392778...,-1.3747708222956870640565506160729603024606149...,412.0,692.0,291.0,300.0,2.0,2.0
0,49328_ec54acac,39,248,0,7,32,249,8,52,6654.164426009467206313274800777435302734375,...,2450.5456384980434449971653521060943603515625,12.4432701082473311515741443145088851451873779...,1171.16935866130734211765229701995849609375,-8.7740379461780175773810697137378156185150146...,953.0,1341.0,635.0,908.0,33.0,33.0
0,52886_5743bcf0,84,506,0,6,95,471,27,113,330.6969347058985704279621131718158721923828125,...,335.7829387343690541456453502178192138671875,-29.391946704272644552702331566251814365386962...,-0.0170337064311080234324524695921354577876627...,-0.0024296677249688904132174371852670446969568...,2070.0,3245.0,1302.0,1643.0,37.0,37.0
0,78600_3f295e84,1,13,0,0,0,16,2,2,167.761640619922246742135030217468738555908203125,...,186.855009936893708299976424314081668853759765625,-20.331525047524724669756324146874248981475830...,-22.050440384952775474403097177855670452117919...,2.08167900008098216702023819379974156618118286...,34.0,38.0,34.0,55.0,6.0,6.0


## Load embedded Image files 

In [30]:
img_tr=pd.read_csv('feature-extractor-output/ass_fingerprints.csv')
img_val=pd.read_csv('feature-extractor-output/val_fingerprints.csv')

In [39]:
img_tr.rename(columns={'ID':'assembly_name'}, inplace=True)
img_val.rename(columns={'ID':'assembly_name'}, inplace=True)

img_tr.assembly_name=img_tr.assembly_name.apply(lambda x: x.replace(".jpg",""))
img_val.assembly_name=img_val.assembly_name.apply(lambda x: x.replace(".jpg",""))

In [44]:
#img_tr.to_excel('feature-extractor-output/train_img.xlsx',index=False)
#img_val.to_excel('feature-extractor-output/val_img.xlsx',index=False)

In [49]:
train=pd.merge(df, img_tr, on='assembly_name')
validation=pd.merge(df_val, img_val, on='assembly_name')

In [51]:
train.to_pickle('feature-extractor-output/train_dataset.pickle')
validation.to_pickle('feature-extractor-output/validation_dataset.pickle')

In [59]:
img_test=pd.read_csv('feature-extractor-output/test-fingerprints.csv')
img_test.rename(columns={'ID':'assembly_name'}, inplace=True)
img_test.assembly_name=img_test.assembly_name.apply(lambda x: x.replace(".jpg",""))

In [63]:
test=pd.merge(df_test,img_test,on='assembly_name')
test.to_pickle('feature-extractor-output/test_dataset.pickle')