In [24]:
import pandas as pd
import glob
import numpy as np
import os

In [25]:
def read_csv(ps):
    df = pd.read_csv(ps, dtype={'filename': str,} ) 
    return df.rename(columns={'hierachical': os.path.basename(ps).split('.')[0]})

In [26]:
def sum_from_male(x):
    p1 =   4.151e-18  #(7.063e-19, 7.595e-18)
    p2 =  -1.509e-14  #(-2.8e-14, -2.174e-15)
    p3 =   2.228e-11  #(2.803e-12, 4.175e-11)
    p4 =  -1.723e-08  #(-3.236e-08, -2.109e-09)
    p5 =    7.53e-06  #(1.104e-06, 1.396e-05)
    p6 =   -0.001881  #(-0.003345, -0.0004164)
    p7 =      0.2673  #(0.1055, 0.4291)
    p8 =      -7.899  #(-14.42, -1.379)
    
    return p1*x**7 + p2*x**6 + p3*x**5 + p4*x**4 + p5*x**3 + p6*x**2 + p7*x + p8

def sum_from_female(x):
    p1 =   3.645e-18  #(8.398e-19, 6.45e-18)
    p2 =  -1.329e-14  #(-2.416e-14, -2.429e-15)
    p3 =   1.983e-11  #(2.712e-12, 3.696e-11)
    p4 =  -1.566e-08  #(-2.977e-08, -1.552e-09)
    p5 =    7.09e-06  #(5.911e-07, 1.359e-05)
    p6 =   -0.001866  #(-0.003519, -0.0002139)
    p7 =      0.2827  #(0.07059, 0.4949)
    p8 =      -12.29  #(-22.82, -1.76)
    
    return p1*x**7 + p2*x**6 + p3*x**5 + p4*x**4 + p5*x**3 + p6*x**2 + p7*x + p8

In [27]:
def read_xlsx(ps):
    df = pd.read_excel(ps)
    names = df['Unnamed: 0']
    ns = []
    for nn in names:
        ns.append(nn.split('\n')[1])
    df['Unnamed: 0'] = ns
    df = df.rename(columns={'Unnamed: 0':'bone_type'})
    return df

In [28]:
bonecsvs = glob.glob('./*.csv')

dfs = []
bonecsvs = sorted(bonecsvs)
for ps in bonecsvs:
    dfs.append(read_csv(ps))
    
df = dfs[0]
for dfi in dfs[1:]:
    df = pd.merge(df, dfi, on='filename')
    
res_df = df.set_index('filename')

In [29]:
male_s = read_xlsx('../male.xls')
female_s = read_xlsx('../female.xls')
male_s = male_s.set_index('bone_type').sort_index()
female_s = female_s.set_index('bone_type').sort_index()

In [30]:
last_use_label_df = pd.read_excel('../last_use_label.xls', dtype={'id': str})
tt = last_use_label_df[last_use_label_df['sex'] == 'M']['id'].values

tt = '0' + tt

male_ids = set(tt)

In [33]:
assert np.all(res_df.columns == male_s.index.values)
assert np.all(res_df.columns == female_s.index.values)

In [35]:
res_df['s'] = -1
res_df['age'] = -1

In [36]:
all_s = []
age_s = []
gender_s = []

for x in res_df.index:
    t = 0.
    g = 0
    if x in male_ids:
        ss = male_s
        summary = sum_from_male
        g = 1
    else:
        ss = female_s
        summary = sum_from_female
    for bone_type, grade in zip(res_df.columns, res_df.loc[x].values):
        if grade >= 0:
            t += ss.loc[bone_type, grade] 
    all_s.append(t)
    age_s.append(summary(t))
    gender_s.append(g)

In [38]:
res_df['s'] = all_s
res_df['age'] = age_s
res_df['gender'] = gender_s

In [41]:
res_df.to_csv('../final_res.csv')