In [11]:
import os, sys, getopt
import numpy as np



def read_sources_features_from_dir(features_dir, fun_features_str2tuple):
    time_begin = datetime.now()
    files = get_files_from_dir(features_dir)
    file_feature_dict = {}
    counter = 0
    for file in files:
        if os.path.isdir('%s/%s' % (features_dir, file)) or file.startswith('.') or os.path.getsize(
                '%s/%s' % (features_dir, file)) == 0:
            print('忽略空文件: %s' % file)
            continue

        source_file = '%s/%s' % (features_dir, file)
        list_of_features = text_read(source_file)
        counter += 1
        print('src-file %d  %s %d' % (counter, source_file, len(list_of_features)))

        source_features = np.array(list(map(lambda d: fun_features_str2tuple(d), list_of_features)), dtype=np.float32)
        file_feature_dict[file] = source_features
    time_end = datetime.now()
    print('读取%d个源特征文件用时：%s' % (len(files), (time_end - time_begin)))
    return file_feature_dict


def trans_source_data_to_tuple(data):
    ds = data.split()
    return float(ds[1]), float(ds[2]), float(ds[3]), float(ds[4])


def trans_all_source_features_as_whole(file_feature_dict):
    time_begin = time.time()

    total_features_numb = 0
    file_index_dict = {}
    file_features_index_dict = {}
    begin_index = 0
    file_index = 0
    print('prepare trans features')
    for file_name, features in file_feature_dict.items():
        source_features_numb = len(features)
        total_features_numb += source_features_numb

        file_index_dict[file_index] = file_name
        file_features_index_dict[file_index] = (begin_index, total_features_numb)

        begin_index = total_features_numb
        file_index += 1
    time_prepare_end = time.time()

    mem_size = total_features_numb * 4 * 4   # 点精度浮点32位，4字节，每个feature 16个字节。
    print('prepare trans features end. TotalFeatures: %d %d %s' % (total_features_numb, mem_size, (time_prepare_end - time_begin)))

    all_source_features = np.empty(shape=[0, 4])
    for file_id in range(len(file_features_index_dict.items())):
        file_name = file_index_dict[file_id]
        features = file_feature_dict[file_name]
        all_source_features = np.vstack((all_source_features, features))
    time_prepare_end2 = time.time()
    print('prepare all_source_features end. %s' % (time_prepare_end2 - time_prepare_end))

    d_all_source_features = cuda.to_device(all_source_features)
    time_end = time.time()


    print('to_device time. %s' % (time_end - time_prepare_end2))
    print('特征值%d导入GPU用时：%d' % (mem_size, (time_end - time_begin)))
    return d_all_source_features, file_index_dict, file_features_index_dict


file_feature_dict = read_sources_features_from_dir(sources_dir, trans_source_data_to_tuple)
file_feature_dict 

d_all_sources_features, file_index_dict, file_features_index_dict = trans_all_source_features_as_whole(
        file_feature_dict)

d_all_sources_features

src-file 1  ./data/source/超级飞侠24_AIresult_top5.txt 3903
src-file 2  ./data/source/706_故宫至宝11_2020-08-05-17-36-20.txt 15710
src-file 3  ./data/source/面具战士06_AIresult_top5.txt 7557
src-file 4  ./data/source/700_故宫至宝05_2020-08-05-15-49-38.txt 16084
src-file 5  ./data/source/2_果果骑侠传E02_2020-08-01-06-48-07.txt 4202
src-file 6  ./data/source/446_翠兰的爱情-24_2020-08-04-07-45-22.txt 13624
src-file 7  ./data/source/熊仔之雄心壮志29_AIresult_top5.txt 4501
src-file 8  ./data/source/740_草根王-29_2020-08-05-20-31-36.txt 13499
src-file 9  ./data/source/763_边关烽火情-22_2020-08-06-01-38-37.txt 13683
src-file 10  ./data/source/244_爱情达阵_2020-08-02-18-20-46.txt 32521
src-file 11  ./data/source/558_案发48小时13_2020-08-04-23-28-21.txt 12819
src-file 12  ./data/source/483_冰路前行11_2020-08-04-11-47-34.txt 12748
src-file 13  ./data/source/兵出潼关-24_AIresult_top5.txt 13530
src-file 14  ./data/source/熊仔之雄心壮志01_AIresult_top5.txt 4501
src-file 15  ./data/source/828_厨艺大师S08E02_2020-08-06-09-34-31.txt 17799
src-file 16  ./data/source/28

src-file 125  ./data/source/873_厨艺大师：专业人员S04E12_2020-08-06-14-48-37.txt 17757
src-file 126  ./data/source/468_传奇航母06_2020-08-04-13-18-37.txt 15379
src-file 127  ./data/source/340_宝贝儿回家-17_2020-08-03-16-01-23.txt 13653
src-file 128  ./data/source/961_地球家园32_2020-08-06-15-55-32.txt 7733
src-file 129  ./data/source/爱情口难开_AIresult_top5.txt 30189
src-file 130  ./data/source/123_豆芽农场31_2020-08-01-14-59-53.txt 4501
src-file 131  ./data/source/103_豆芽农场11_2020-08-01-13-02-42.txt 4501
src-file 132  ./data/source/兵出潼关-15_AIresult_top5.txt 13594
src-file 133  ./data/source/578_案发48小时33_2020-08-05-03-21-45.txt 13076
src-file 134  ./data/source/熊仔之雄心壮志30_AIresult_top5.txt 4500
src-file 135  ./data/source/105_豆芽农场13_2020-08-01-13-31-52.txt 4501
src-file 136  ./data/source/255_兵出潼关-02_2020-08-02-18-37-28.txt 13592
src-file 137  ./data/source/I-5杀手追杀令_AIresult_top5.txt 25296
src-file 138  ./data/source/果果骑侠传E33_AIresult_top5.txt 4202
src-file 139  ./data/source/206_42号传奇_2020-08-02-17-49-31.txt 36694
s

src-file 243  ./data/source/941_品位空间43_2020-08-06-12-40-17.txt 6727
src-file 244  ./data/source/653_贝多芬计划-01_2020-08-05-08-30-33.txt 7824
src-file 245  ./data/source/231_爱乐之城_2020-08-02-20-32-51.txt 35841
src-file 246  ./data/source/245_爱情避风港_2020-08-02-17-08-50.txt 31136
src-file 247  ./data/source/384_爸爸是条龙-02_2020-08-03-22-14-39.txt 13560
src-file 248  ./data/source/110_豆芽农场18_2020-08-01-13-23-46.txt 4501
src-file 249  ./data/source/631_濒临灭绝06_2020-08-05-05-16-53.txt 15263
src-file 250  ./data/source/906_品位空间08_2020-08-06-09-09-17.txt 6749
src-file 251  ./data/source/246_爱慕_2020-08-02-16-08-22.txt 30327
src-file 252  ./data/source/564_案发48小时19_2020-08-05-01-23-16.txt 13442
src-file 253  ./data/source/933_品位空间35_2020-08-06-12-01-06.txt 6713
src-file 254  ./data/source/333_宝贝儿回家-10_2020-08-03-13-10-04.txt 13660
src-file 255  ./data/source/113_豆芽农场21_2020-08-01-13-32-40.txt 4501
src-file 256  ./data/source/115_豆芽农场23_2020-08-01-14-18-29.txt 4501
src-file 257  ./data/source/589_案发48小时44

src-file 362  ./data/source/952_地球家园16_2020-08-06-15-50-16.txt 7687
src-file 363  ./data/source/789_zippo音乐之旅08_2020-08-06-04-24-50.txt 7653
src-file 364  ./data/source/380_春光灿烂猪九妹-27_2020-08-03-22-14-14.txt 13513
src-file 365  ./data/source/587_案发48小时42_2020-08-05-04-47-02.txt 12260
src-file 366  ./data/source/461_不呼吸的人-01：冰下潜行_2020-08-04-12-22-44.txt 13202
src-file 367  ./data/source/984_地球家园E23_2020-08-06-16-03-19.txt 7835
src-file 368  ./data/source/294_大村官-09_2020-08-03-10-06-40.txt 13785
src-file 369  ./data/source/731_草根王-20_2020-08-05-18-11-30.txt 13495
src-file 370  ./data/source/704_故宫至宝09_2020-08-05-15-36-46.txt 15279
src-file 371  ./data/source/920_品位空间22_2020-08-06-07-56-01.txt 6704
src-file 372  ./data/source/928_品位空间30_2020-08-06-08-58-57.txt 6754
src-file 373  ./data/source/793_世界厨房S01E03_2020-08-06-04-16-23.txt 6700
src-file 374  ./data/source/970_地球家园401_2020-08-06-19-56-43.txt 7549
src-file 375  ./data/source/278_兵出潼关-25_2020-08-03-03-23-08.txt 13563
src-file 376  ./

src-file 479  ./data/source/582_案发48小时37_2020-08-04-21-06-58.txt 12868
src-file 480  ./data/source/284_兵出潼关-31_2020-08-03-03-59-08.txt 13564
src-file 481  ./data/source/58_熊仔之雄心壮志18_2020-08-01-10-35-55.txt 4500
src-file 482  ./data/source/705_故宫至宝10_2020-08-05-16-11-04.txt 15430
src-file 483  ./data/source/286_大村官-01_2020-08-03-04-37-58.txt 13645
src-file 484  ./data/source/487_冰路前行15_2020-08-04-13-22-37.txt 12818
src-file 485  ./data/source/460_草根王-08_2020-08-04-11-49-58.txt 13498
src-file 486  ./data/source/639_爆破达人04：核电站任务_2020-08-05-05-24-07.txt 14074
src-file 487  ./data/source/994_奈杰尔·斯莱特的今日特色餐05_2020-08-06-22-31-55.txt 8735
src-file 488  ./data/source/156_超级飞侠24_2020-08-01-16-50-22.txt 3903
src-file 489  ./data/source/719_颐和园06_2020-08-05-19-59-47.txt 13712
src-file 490  ./data/source/190_笔仙_2020-08-02-17-32-37.txt 27968
src-file 491  ./data/source/481_冰路前行09_2020-08-04-13-36-54.txt 11939
src-file 492  ./data/source/312_大村官-27_2020-08-03-13-40-03.txt 13751
src-file 493  ./data/s

src-file 596  ./data/source/626_濒临灭绝01_2020-08-05-13-39-55.txt 15390
src-file 597  ./data/source/710_美丽中国03_2020-08-05-17-59-58.txt 14582
src-file 598  ./data/source/575_案发48小时30_2020-08-05-03-57-00.txt 13158
src-file 599  ./data/source/454_草根王-02_2020-08-04-10-43-18.txt 13499
src-file 600  ./data/source/983_地球家园E22_2020-08-06-22-26-18.txt 7733
src-file 601  ./data/source/645_贝丝·霍洛威追凶记03_2020-08-05-11-16-16.txt 11532
src-file 602  ./data/source/536_大象的神秘生活01_2020-08-05-00-46-12.txt 14937
src-file 603  ./data/source/388_爸爸是条龙-06_2020-08-04-00-50-19.txt 13491
src-file 604  ./data/source/89_熊仔之雄心壮志49_2020-08-01-12-42-00.txt 4500
src-file 605  ./data/source/892_吉米的食品工厂2_2020-08-06-12-25-38.txt 8649
src-file 606  ./data/source/211_BJ单身日记_2020-08-02-16-50-15.txt 25695
src-file 607  ./data/source/681_大三峡04_2020-08-05-16-55-58.txt 13252
src-file 608  ./data/source/126_豆芽农场34_2020-08-01-14-15-53.txt 4501
src-file 609  ./data/source/724_草根王-13_2020-08-05-20-13-22.txt 13501
src-file 610  ./data/s

src-file 716  ./data/source/331_宝贝儿回家-08_2020-08-03-12-45-29.txt 13652
src-file 717  ./data/source/988_地球家园E27_2020-08-06-16-55-59.txt 7700
src-file 718  ./data/source/49_熊仔之雄心壮志09_2020-08-01-09-46-46.txt 4501
src-file 719  ./data/source/664_达拉斯特警02_2020-08-05-14-12-42.txt 13524
src-file 720  ./data/source/820_世界厨房S04E02_2020-08-05-21-42-10.txt 6727
src-file 721  ./data/source/168_面具战士10_2020-08-01-19-41-05.txt 7557
src-file 722  ./data/source/191_笔仙2_2020-08-02-18-09-15.txt 28322
src-file 723  ./data/source/680_大三峡03_2020-08-05-12-53-51.txt 13252
src-file 724  ./data/source/981_地球家园412_2020-08-06-16-17-11.txt 7877
src-file 725  ./data/source/887_厨艺大师：专业人员第四季特别篇2_2020-08-06-08-04-40.txt 8682
src-file 726  ./data/source/713_美丽中国06_2020-08-05-18-43-24.txt 14762
src-file 727  ./data/source/201_1966年世界杯_2020-08-02-10-19-39.txt 26915
src-file 728  ./data/source/976_地球家园407_2020-08-06-20-34-05.txt 7607
src-file 729  ./data/source/733_草根王-22_2020-08-05-20-09-01.txt 13495
src-file 730  ./data/

src-file 832  ./data/source/678_大三峡01_2020-08-05-12-48-01.txt 13252
src-file 833  ./data/source/170_面具战士12_2020-08-01-18-55-36.txt 7557
src-file 834  ./data/source/567_案发48小时22_2020-08-05-00-18-09.txt 13446
src-file 835  ./data/source/634_濒临灭绝09_2020-08-05-10-06-27.txt 15332
src-file 836  ./data/source/415_爸爸是条龙-33_2020-08-04-04-16-43.txt 13530
src-file 837  ./data/source/67_熊仔之雄心壮志27_2020-08-01-11-11-38.txt 4500
src-file 838  ./data/source/622_澳洲边缘-04：珀斯_2020-08-05-09-00-33.txt 14062
src-file 839  ./data/source/810_世界厨房S03E08_2020-08-05-23-00-51.txt 6751
src-file 840  ./data/source/404_爸爸是条龙-22_2020-08-04-01-12-04.txt 13510
src-file 841  ./data/source/852_厨艺大师：专业人员S03E11_2020-08-06-14-19-18.txt 17750
src-file 842  ./data/source/809_世界厨房S03E07_2020-08-05-23-32-36.txt 6659
src-file 843  ./data/source/718_颐和园05_2020-08-05-20-42-41.txt 13713
src-file 844  ./data/source/511_城市之巅04_2020-08-04-19-44-01.txt 15574
src-file 845  ./data/source/935_品位空间37_2020-08-06-09-49-04.txt 6554
src-file 846

src-file 949  ./data/source/317_大村官-32_2020-08-03-10-02-54.txt 13656
src-file 950  ./data/source/525_城市探索者-12：凤凰_2020-08-04-15-52-07.txt 7791
src-file 951  ./data/source/620_澳洲边缘-02：沉船海岸_2020-08-05-09-25-28.txt 14119
src-file 952  ./data/source/913_品位空间15_2020-08-06-09-20-30.txt 6729
src-file 953  ./data/source/185_b+侦探_2020-08-02-06-00-01.txt 29294
src-file 954  ./data/source/972_地球家园403_2020-08-06-21-37-59.txt 7655
src-file 955  ./data/source/17_果果骑侠传E17_2020-08-01-07-31-37.txt 4202
src-file 956  ./data/source/860_厨艺大师：专业人员S03E19_2020-08-06-05-12-27.txt 8871
src-file 957  ./data/source/141_超级飞侠09_2020-08-01-15-27-49.txt 3903
src-file 958  ./data/source/128_豆芽农场36_2020-08-01-14-48-27.txt 4501
src-file 959  ./data/source/593_案发48小时48_2020-08-05-05-35-43.txt 12245
src-file 960  ./data/source/136_超级飞侠04_2020-08-01-15-14-57.txt 3903
src-file 961  ./data/source/770_边关烽火情-29_2020-08-05-23-09-18.txt 13683
src-file 962  ./data/source/351_宝贝儿回家-28_2020-08-03-16-31-11.txt 13652
src-file 963  ./

array([[7.11000000e+02, 1.15840003e-01, 2.33000000e+02, 6.53299987e-02],
       [7.11000000e+02, 1.04757003e-01, 2.33000000e+02, 7.02200010e-02],
       [7.11000000e+02, 1.22422002e-01, 2.33000000e+02, 6.44659996e-02],
       ...,
       [7.80000000e+02, 2.28504002e-01, 2.50000000e+02, 2.21166000e-01],
       [2.50000000e+02, 2.47686997e-01, 7.80000000e+02, 2.38499001e-01],
       [7.80000000e+02, 2.93300986e-01, 2.50000000e+02, 1.95785999e-01]])