In [1]:
import os
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import time
import datetime
import zipfile

- a) 传感器高频数据：该数据来自于模温机及模具传感器采集的数据，文件夹内每一个模次对应一个csv文件，单个模次时长为40~43s，采样频率根据阶段有20Hz和50Hz两种，含有24个传感器采集的数据；
- b) 成型机状态数据（data_spc）：该数据来自成型机机台，均为表征成型过程中的一些状态数据，每一行对应一个模次，数据维度为86维；
- c) 机台工艺设定参数（data_set）：文件夹中含有注塑成型的81种工艺设定参数；
- d) 产品测量尺寸(size)：文件夹内含有每个模次产品的3维尺寸；
- e) 现场巡检数据（初赛不提供此类数据）；
- f) 电机停机记录（初赛不提供此类数据）；
- g) 炮筒温度记录（初赛不提供此类数据）。

![pic](.\info.png)

- 动作阶段，包括：
0-空闲，1-合模，2-座进，3-注射，4-保压，6-熔胶，7-后松退，8冷却，9座退，10-开模，11-顶进，12-顶退，13~16-中子1~4进，17~20-中子1~4退。

# 数据读取

## 一般数据读取

In [3]:
# 决赛测试集
test_data_set_taskA=pd.read_csv(r'./taskA/data_set.csv')
test_data_spc_taskA=pd.read_csv(r'./taskA/data_spc.csv')
sub_file_taskA =pd.read_csv('./taskA/sub_file.csv')

In [3]:
print("test_data_set_taskA形状:",test_data_set_taskA.shape)
print("test_data_spc_taskA形状:",test_data_spc_taskA.shape)
print("sub_file_taskA形状:",sub_file_taskA.shape)

test_data_set_taskA形状: (295, 347)
test_data_spc_taskA形状: (3971, 86)
sub_file_taskA形状: (3971, 4)


## 决赛高频传感器数据读取

In [4]:
variables_sensor=['Sensor1', 'Sensor2', 'Sensor3', 'IJ', 'Sensor5',
       'Sensor6', 'MouldTemp1', 'MouldTemp2', 'MouldTemp3', 'MouldTemp4',
       'MouldTemp5', 'MouldTemp9', 'MouldTemp10', 'MouldTemp11', 'MouldTemp12',
       'MouldTemp13', 'MouldTemp14', 'Sensor8', 'MouldFlow1', 'MouldFlow2',
       'MouldFlow3', 'SP']#mean,var,max,min,median
variables_sensor_pressure=['Sensor1', 'Sensor2', 'Sensor3']#skew,kurt,mean/sum/var(pahse 4\6\7\8\18)
variables_sensor_temp=['Sensor5', 'Sensor6']# skew,kurt, sum/max(phase 4,6,7,8,18,10)
variables_sensor_IJ=['IJ']# argmax/max/sum/mean/var(phase 3) mean/var(phase 4)

In [5]:
def add_individual_feature_cols(v,f_cols):
    if v in variables_sensor_pressure:
        f_cols.append(v + '_skew')
        f_cols.append(v + '_kurt')
        f_cols.append(v + '_mean_phase4678_18')
        f_cols.append(v + '_sum_phase4678_18')
        f_cols.append(v + '_var_phase4678_18')
    elif v in variables_sensor_temp:
        f_cols.append(v + '_skew')
        f_cols.append(v + '_kurt')
        f_cols.append(v + '_sum_phase4678_18_10')
        f_cols.append(v + '_max_phase4678_18_10')
    elif v in variables_sensor_IJ:
        f_cols.append(v + '_argmax_phase3')
        f_cols.append(v + '_max_phase3')
        f_cols.append(v + '_sum_phase3')
        f_cols.append(v + '_mean_phase3')
        f_cols.append(v + '_var_phase3')
        f_cols.append(v + '_mean_phase4')
        f_cols.append(v + '_var_phase4')
    return f_cols
def add_individual_feature(col,avg,tmp_df):
    if col in variables_sensor_pressure:
        tem=tmp_df[col].skew()
        avg.append(tem)
        tem=tmp_df[col].kurt()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0,6.0,7.0,8.0,18.0])].mean()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0,6.0,7.0,8.0,18.0])].sum()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0,6.0,7.0,8.0,18.0])].var()
        avg.append(tem)
    elif col in variables_sensor_temp:
        tem=tmp_df[col].skew()
        avg.append(tem)
        tem=tmp_df[col].kurt()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0,6.0,7.0,8.0,18.0,10.0])].sum()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0,6.0,7.0,8.0,18.0,10.0])].max()
        avg.append(tem)
    elif col in variables_sensor_IJ:
        a = tmp_df[col][tmp_df[col].isin([3.0])]
        tem=a.argmax() if len(a)>0 else 0
        avg.append(tem)
        
        tem=tmp_df[col][tmp_df["Phase"].isin([3.0])].max()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([3.0])].sum()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([3.0])].mean()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([3.0])].var()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0])].mean()
        avg.append(tem)
        tem=tmp_df[col][tmp_df["Phase"].isin([4.0])].var()
        avg.append(tem)
    return avg

def feature_columns(variables):
    f_cols = []
    for v in variables:
        f_cols.append(v + '_mean')
        f_cols.append(v + '_var')
        f_cols.append(v + '_max')
        f_cols.append(v + '_min')
        f_cols.append(v + '_median')
        f_cols=add_individual_feature_cols(v,f_cols)
    return f_cols

def stage_features(df, variables):
    avg = []
    tmp_df = df#.loc[:, variables]
    for v in variables:
        tmp = tmp_df[v].mean()#均值
        avg.append(tmp)
        tmp=tmp_df[v].var()#方差
        avg.append(tmp)
        tmp=tmp_df[v].max()#最大值
        avg.append(tmp)
        tmp=tmp_df[v].min()#最小值
        avg.append(tmp)
        tmp=tmp_df[v].median()#中位数
        avg.append(tmp)
        avg = add_individual_feature(v,avg,tmp_df)
#     print(avg)
    return np.array(avg)

In [6]:
%%time
# 任务A高频数据特征提取
TEST_ZIP_taskA=zipfile.ZipFile('C:/Users/Zetlin/Downloads/IJ_data/taskA/传感器高频数据.zip')
file_list=TEST_ZIP_taskA.namelist()
feature_n = len(variables_sensor)
features_ = np.empty([len(file_list), feature_n*5+30])
times_ = []
mold_id_ = []
for i,f in enumerate(file_list):
#     print(i,f)
    df=pd.read_csv(TEST_ZIP_taskA.open(f))
    tmp = f.split('_')
    ti = tmp[2]
    mold_id = tmp[3].replace('.csv', '')
    times_.append(str(ti))
    mold_id_.append(int(mold_id))
    if len(df) == 0:
        features_[i] = [None for j in range(feature_n)]
    else:
        features_[i] = stage_features(df, variables_sensor)
    print(i,f,features_.shape)
f_cols = feature_columns(variables_sensor)
TEST_HIG_taskA = pd.DataFrame(features_, columns=f_cols)
TEST_HIG_taskA['Time'] = times_
TEST_HIG_taskA['Id'] = mold_id_
TEST_HIG_taskA = TEST_HIG_taskA[['Id', 'Time'] + f_cols]
TEST_ZIP_taskA.close()

0 mold_data_611925088_56694.csv (3971, 140)
1 mold_data_611925276_56699.csv (3971, 140)
2 mold_data_611925501_56705.csv (3971, 140)
3 mold_data_611925613_56708.csv (3971, 140)
4 mold_data_611925764_56712.csv (3971, 140)
5 mold_data_611925877_56715.csv (3971, 140)
6 mold_data_611926103_56721.csv (3971, 140)
7 mold_data_611926178_56723.csv (3971, 140)
8 mold_data_611926253_56725.csv (3971, 140)
9 mold_data_611926328_56727.csv (3971, 140)
10 mold_data_611926404_56729.csv (3971, 140)
11 mold_data_611926517_56732.csv (3971, 140)
12 mold_data_611926818_56740.csv (3971, 140)
13 mold_data_611926968_56744.csv (3971, 140)
14 mold_data_611927043_56746.csv (3971, 140)
15 mold_data_611927119_56748.csv (3971, 140)
16 mold_data_611927194_56750.csv (3971, 140)
17 mold_data_611927645_56762.csv (3971, 140)
18 mold_data_611927683_56763.csv (3971, 140)
19 mold_data_611927758_56765.csv (3971, 140)
20 mold_data_611927984_56771.csv (3971, 140)
21 mold_data_611928059_56773.csv (3971, 140)
22 mold_data_6119283

181 mold_data_611950856_57379.csv (3971, 140)
182 mold_data_611950894_57380.csv (3971, 140)
183 mold_data_611950969_57382.csv (3971, 140)
184 mold_data_611951120_57386.csv (3971, 140)
185 mold_data_611951157_57387.csv (3971, 140)
186 mold_data_611951308_57391.csv (3971, 140)
187 mold_data_611951345_57392.csv (3971, 140)
188 mold_data_611951496_57396.csv (3971, 140)
189 mold_data_611951533_57397.csv (3971, 140)
190 mold_data_611951684_57401.csv (3971, 140)
191 mold_data_611951759_57403.csv (3971, 140)
192 mold_data_611951797_57404.csv (3971, 140)
193 mold_data_611951910_57407.csv (3971, 140)
194 mold_data_611951947_57408.csv (3971, 140)
195 mold_data_611952098_57412.csv (3971, 140)
196 mold_data_611952136_57413.csv (3971, 140)
197 mold_data_611952211_57415.csv (3971, 140)
198 mold_data_611952436_57421.csv (3971, 140)
199 mold_data_611952700_57428.csv (3971, 140)
200 mold_data_611953001_57436.csv (3971, 140)
201 mold_data_611953038_57437.csv (3971, 140)
202 mold_data_611953076_57438.csv 

361 mold_data_611967817_57828.csv (3971, 140)
362 mold_data_611967855_57829.csv (3971, 140)
363 mold_data_611967968_57832.csv (3971, 140)
364 mold_data_611968081_57835.csv (3971, 140)
365 mold_data_611968419_57844.csv (3971, 140)
366 mold_data_611968570_57848.csv (3971, 140)
367 mold_data_611968645_57850.csv (3971, 140)
368 mold_data_611968683_57851.csv (3971, 140)
369 mold_data_611968758_57853.csv (3971, 140)
370 mold_data_611969059_57861.csv (3971, 140)
371 mold_data_611969134_57863.csv (3971, 140)
372 mold_data_611969210_57865.csv (3971, 140)
373 mold_data_611969247_57866.csv (3971, 140)
374 mold_data_611969398_57870.csv (3971, 140)
375 mold_data_611969473_57872.csv (3971, 140)
376 mold_data_611969661_57877.csv (3971, 140)
377 mold_data_611969737_57879.csv (3971, 140)
378 mold_data_611969887_57883.csv (3971, 140)
379 mold_data_611970038_57887.csv (3971, 140)
380 mold_data_611970151_57890.csv (3971, 140)
381 mold_data_611970188_57891.csv (3971, 140)
382 mold_data_611970226_57892.csv 

540 mold_data_611984640_58275.csv (3971, 140)
541 mold_data_611984715_58277.csv (3971, 140)
542 mold_data_611984790_58279.csv (3971, 140)
543 mold_data_611984903_58282.csv (3971, 140)
544 mold_data_611985317_58293.csv (3971, 140)
545 mold_data_611985694_58303.csv (3971, 140)
546 mold_data_611985844_58307.csv (3971, 140)
547 mold_data_611985882_58308.csv (3971, 140)
548 mold_data_611986033_58312.csv (3971, 140)
549 mold_data_611986221_58317.csv (3971, 140)
550 mold_data_611986334_58320.csv (3971, 140)
551 mold_data_611986447_58323.csv (3971, 140)
552 mold_data_611986710_58330.csv (3971, 140)
553 mold_data_611986785_58332.csv (3971, 140)
554 mold_data_611986861_58334.csv (3971, 140)
555 mold_data_611987049_58339.csv (3971, 140)
556 mold_data_611987124_58341.csv (3971, 140)
557 mold_data_611987350_58347.csv (3971, 140)
558 mold_data_611987613_58354.csv (3971, 140)
559 mold_data_611987801_58359.csv (3971, 140)
560 mold_data_611988216_58370.csv (3971, 140)
561 mold_data_611988328_58373.csv 

720 mold_data_612004439_58801.csv (3971, 140)
721 mold_data_612004552_58804.csv (3971, 140)
722 mold_data_612004777_58810.csv (3971, 140)
723 mold_data_612004853_58812.csv (3971, 140)
724 mold_data_612004966_58815.csv (3971, 140)
725 mold_data_612005417_58827.csv (3971, 140)
726 mold_data_612005455_58828.csv (3971, 140)
727 mold_data_612005568_58831.csv (3971, 140)
728 mold_data_612005756_58836.csv (3971, 140)
729 mold_data_612005794_58837.csv (3971, 140)
730 mold_data_612005831_58838.csv (3971, 140)
731 mold_data_612005869_58839.csv (3971, 140)
732 mold_data_612005907_58840.csv (3971, 140)
733 mold_data_612005982_58842.csv (3971, 140)
734 mold_data_612006095_58845.csv (3971, 140)
735 mold_data_612006170_58847.csv (3971, 140)
736 mold_data_612006321_58851.csv (3971, 140)
737 mold_data_612006358_58852.csv (3971, 140)
738 mold_data_612006396_58853.csv (3971, 140)
739 mold_data_612006546_58857.csv (3971, 140)
740 mold_data_612006847_58865.csv (3971, 140)
741 mold_data_612007035_58870.csv 

899 mold_data_612021891_59264.csv (3971, 140)
900 mold_data_612021966_59266.csv (3971, 140)
901 mold_data_612022042_59268.csv (3971, 140)
902 mold_data_612022080_59269.csv (3971, 140)
903 mold_data_612022268_59274.csv (3971, 140)
904 mold_data_612022306_59275.csv (3971, 140)
905 mold_data_612022457_59279.csv (3971, 140)
906 mold_data_612022495_59280.csv (3971, 140)
907 mold_data_612022570_59282.csv (3971, 140)
908 mold_data_612022608_59283.csv (3971, 140)
909 mold_data_612022645_59284.csv (3971, 140)
910 mold_data_612022796_59288.csv (3971, 140)
911 mold_data_612022834_59289.csv (3971, 140)
912 mold_data_612022947_59292.csv (3971, 140)
913 mold_data_612023475_59306.csv (3971, 140)
914 mold_data_612023513_59307.csv (3971, 140)
915 mold_data_612023626_59310.csv (3971, 140)
916 mold_data_612023777_59314.csv (3971, 140)
917 mold_data_612023928_59318.csv (3971, 140)
918 mold_data_612024042_59321.csv (3971, 140)
919 mold_data_612024079_59322.csv (3971, 140)
920 mold_data_612024117_59323.csv 

1077 mold_data_613576279_83741.csv (3971, 140)
1078 mold_data_613576319_83742.csv (3971, 140)
1079 mold_data_613576359_83743.csv (3971, 140)
1080 mold_data_613576836_83755.csv (3971, 140)
1081 mold_data_613576956_83758.csv (3971, 140)
1082 mold_data_613577115_83762.csv (3971, 140)
1083 mold_data_613577234_83765.csv (3971, 140)
1084 mold_data_613577553_83773.csv (3971, 140)
1085 mold_data_613577592_83774.csv (3971, 140)
1086 mold_data_613577672_83776.csv (3971, 140)
1087 mold_data_613577871_83781.csv (3971, 140)
1088 mold_data_613577911_83782.csv (3971, 140)
1089 mold_data_613577951_83783.csv (3971, 140)
1090 mold_data_613577990_83784.csv (3971, 140)
1091 mold_data_613578030_83785.csv (3971, 140)
1092 mold_data_613578189_83789.csv (3971, 140)
1093 mold_data_613578309_83792.csv (3971, 140)
1094 mold_data_613578349_83793.csv (3971, 140)
1095 mold_data_613578428_83795.csv (3971, 140)
1096 mold_data_613578468_83796.csv (3971, 140)
1097 mold_data_613578667_83801.csv (3971, 140)
1098 mold_dat

1252 mold_data_613595422_84222.csv (3971, 140)
1253 mold_data_613595741_84230.csv (3971, 140)
1254 mold_data_613595780_84231.csv (3971, 140)
1255 mold_data_613595820_84232.csv (3971, 140)
1256 mold_data_613595860_84233.csv (3971, 140)
1257 mold_data_613595940_84235.csv (3971, 140)
1258 mold_data_613596139_84240.csv (3971, 140)
1259 mold_data_613596178_84241.csv (3971, 140)
1260 mold_data_613596338_84245.csv (3971, 140)
1261 mold_data_613596616_84252.csv (3971, 140)
1262 mold_data_613596935_84260.csv (3971, 140)
1263 mold_data_613597094_84264.csv (3971, 140)
1264 mold_data_613597452_84273.csv (3971, 140)
1265 mold_data_613597571_84276.csv (3971, 140)
1266 mold_data_613597651_84278.csv (3971, 140)
1267 mold_data_613597890_84284.csv (3971, 140)
1268 mold_data_613597970_84286.csv (3971, 140)
1269 mold_data_613598328_84295.csv (3971, 140)
1270 mold_data_613598447_84298.csv (3971, 140)
1271 mold_data_613598567_84301.csv (3971, 140)
1272 mold_data_613598805_84307.csv (3971, 140)
1273 mold_dat

1427 mold_data_613633886_85185.csv (3971, 140)
1428 mold_data_613634642_85204.csv (3971, 140)
1429 mold_data_613634682_85205.csv (3971, 140)
1430 mold_data_613634722_85206.csv (3971, 140)
1431 mold_data_613635041_85214.csv (3971, 140)
1432 mold_data_613635081_85215.csv (3971, 140)
1433 mold_data_613635121_85216.csv (3971, 140)
1434 mold_data_613635360_85222.csv (3971, 140)
1435 mold_data_613635638_85229.csv (3971, 140)
1436 mold_data_613636554_85252.csv (3971, 140)
1437 mold_data_613637151_85267.csv (3971, 140)
1438 mold_data_613637389_85273.csv (3971, 140)
1439 mold_data_613639023_85314.csv (3971, 140)
1440 mold_data_613639302_85321.csv (3971, 140)
1441 mold_data_613639341_85322.csv (3971, 140)
1442 mold_data_613639540_85327.csv (3971, 140)
1443 mold_data_613639818_85334.csv (3971, 140)
1444 mold_data_613639858_85335.csv (3971, 140)
1445 mold_data_613639938_85337.csv (3971, 140)
1446 mold_data_613639978_85338.csv (3971, 140)
1447 mold_data_613640018_85339.csv (3971, 140)
1448 mold_dat

1602 mold_data_613837053_88046.csv (3971, 140)
1603 mold_data_613837091_88047.csv (3971, 140)
1604 mold_data_613837130_88048.csv (3971, 140)
1605 mold_data_613837169_88049.csv (3971, 140)
1606 mold_data_613837325_88053.csv (3971, 140)
1607 mold_data_613837401_88055.csv (3971, 140)
1608 mold_data_613837518_88058.csv (3971, 140)
1609 mold_data_613837556_88059.csv (3971, 140)
1610 mold_data_613837595_88060.csv (3971, 140)
1611 mold_data_613837634_88061.csv (3971, 140)
1612 mold_data_613837712_88063.csv (3971, 140)
1613 mold_data_613837751_88064.csv (3971, 140)
1614 mold_data_613837789_88065.csv (3971, 140)
1615 mold_data_613837945_88069.csv (3971, 140)
1616 mold_data_613838100_88073.csv (3971, 140)
1617 mold_data_613838216_88076.csv (3971, 140)
1618 mold_data_613838527_88084.csv (3971, 140)
1619 mold_data_613838604_88086.csv (3971, 140)
1620 mold_data_613838915_88094.csv (3971, 140)
1621 mold_data_613839070_88098.csv (3971, 140)
1622 mold_data_613839109_88099.csv (3971, 140)
1623 mold_dat

1777 mold_data_613856108_88537.csv (3971, 140)
1778 mold_data_613856418_88545.csv (3971, 140)
1779 mold_data_613856806_88555.csv (3971, 140)
1780 mold_data_613857000_88560.csv (3971, 140)
1781 mold_data_613857194_88565.csv (3971, 140)
1782 mold_data_613857310_88568.csv (3971, 140)
1783 mold_data_613858125_88589.csv (3971, 140)
1784 mold_data_613858203_88591.csv (3971, 140)
1785 mold_data_613858397_88596.csv (3971, 140)
1786 mold_data_613858630_88602.csv (3971, 140)
1787 mold_data_613858708_88604.csv (3971, 140)
1788 mold_data_613858824_88607.csv (3971, 140)
1789 mold_data_613859445_88623.csv (3971, 140)
1790 mold_data_613860260_88644.csv (3971, 140)
1791 mold_data_613860803_88658.csv (3971, 140)
1792 mold_data_613860842_88659.csv (3971, 140)
1793 mold_data_613861036_88664.csv (3971, 140)
1794 mold_data_613861502_88676.csv (3971, 140)
1795 mold_data_613861657_88680.csv (3971, 140)
1796 mold_data_613861735_88682.csv (3971, 140)
1797 mold_data_613861851_88685.csv (3971, 140)
1798 mold_dat

1952 mold_data_613902875_89742.csv (3971, 140)
1953 mold_data_613902913_89743.csv (3971, 140)
1954 mold_data_613903030_89746.csv (3971, 140)
1955 mold_data_613903146_89749.csv (3971, 140)
1956 mold_data_613903379_89755.csv (3971, 140)
1957 mold_data_613903457_89757.csv (3971, 140)
1958 mold_data_613903690_89763.csv (3971, 140)
1959 mold_data_613903729_89764.csv (3971, 140)
1960 mold_data_613903845_89767.csv (3971, 140)
1961 mold_data_613904078_89773.csv (3971, 140)
1962 mold_data_613904272_89778.csv (3971, 140)
1963 mold_data_613904427_89782.csv (3971, 140)
1964 mold_data_613904505_89784.csv (3971, 140)
1965 mold_data_613904660_89788.csv (3971, 140)
1966 mold_data_613904699_89789.csv (3971, 140)
1967 mold_data_613904738_89790.csv (3971, 140)
1968 mold_data_613904932_89795.csv (3971, 140)
1969 mold_data_613904971_89796.csv (3971, 140)
1970 mold_data_613905010_89797.csv (3971, 140)
1971 mold_data_613905126_89800.csv (3971, 140)
1972 mold_data_613905320_89805.csv (3971, 140)
1973 mold_dat

2127 mold_data_613929420_90426.csv (3971, 140)
2128 mold_data_613929458_90427.csv (3971, 140)
2129 mold_data_613929575_90430.csv (3971, 140)
2130 mold_data_613929653_90432.csv (3971, 140)
2131 mold_data_613929730_90434.csv (3971, 140)
2132 mold_data_613929769_90435.csv (3971, 140)
2133 mold_data_613929885_90438.csv (3971, 140)
2134 mold_data_613929924_90439.csv (3971, 140)
2135 mold_data_613929963_90440.csv (3971, 140)
2136 mold_data_613930312_90449.csv (3971, 140)
2137 mold_data_613930390_90451.csv (3971, 140)
2138 mold_data_613930817_90462.csv (3971, 140)
2139 mold_data_613931011_90467.csv (3971, 140)
2140 mold_data_613931089_90469.csv (3971, 140)
2141 mold_data_613931127_90470.csv (3971, 140)
2142 mold_data_613931205_90472.csv (3971, 140)
2143 mold_data_613931244_90473.csv (3971, 140)
2144 mold_data_613931515_90480.csv (3971, 140)
2145 mold_data_613931554_90481.csv (3971, 140)
2146 mold_data_613931942_90491.csv (3971, 140)
2147 mold_data_613932020_90493.csv (3971, 140)
2148 mold_dat

2302 mold_data_617577743_151031.csv (3971, 140)
2303 mold_data_617578979_151061.csv (3971, 140)
2304 mold_data_617579391_151071.csv (3971, 140)
2305 mold_data_617580833_151106.csv (3971, 140)
2306 mold_data_617581039_151111.csv (3971, 140)
2307 mold_data_617581245_151116.csv (3971, 140)
2308 mold_data_617581657_151126.csv (3971, 140)
2309 mold_data_617582069_151136.csv (3971, 140)
2310 mold_data_617582688_151151.csv (3971, 140)
2311 mold_data_617583512_151171.csv (3971, 140)
2312 mold_data_617584336_151191.csv (3971, 140)
2313 mold_data_617585365_151216.csv (3971, 140)
2314 mold_data_617585571_151221.csv (3971, 140)
2315 mold_data_617585777_151226.csv (3971, 140)
2316 mold_data_617586188_151236.csv (3971, 140)
2317 mold_data_617587630_151271.csv (3971, 140)
2318 mold_data_617588248_151286.csv (3971, 140)
2319 mold_data_617588455_151291.csv (3971, 140)
2320 mold_data_617589279_151311.csv (3971, 140)
2321 mold_data_617590722_151346.csv (3971, 140)
2322 mold_data_617590928_151351.csv (397

2473 mold_data_617699488_153951.csv (3971, 140)
2474 mold_data_617700106_153966.csv (3971, 140)
2475 mold_data_617702579_154026.csv (3971, 140)
2476 mold_data_617703610_154051.csv (3971, 140)
2477 mold_data_617704022_154061.csv (3971, 140)
2478 mold_data_617705671_154101.csv (3971, 140)
2479 mold_data_617705877_154106.csv (3971, 140)
2480 mold_data_617706496_154121.csv (3971, 140)
2481 mold_data_617707114_154136.csv (3971, 140)
2482 mold_data_617708557_154171.csv (3971, 140)
2483 mold_data_617710412_154216.csv (3971, 140)
2484 mold_data_617710618_154221.csv (3971, 140)
2485 mold_data_617711030_154231.csv (3971, 140)
2486 mold_data_617711442_154241.csv (3971, 140)
2487 mold_data_617712472_154266.csv (3971, 140)
2488 mold_data_617714327_154311.csv (3971, 140)
2489 mold_data_617714533_154316.csv (3971, 140)
2490 mold_data_617714945_154326.csv (3971, 140)
2491 mold_data_617715151_154331.csv (3971, 140)
2492 mold_data_617717006_154376.csv (3971, 140)
2493 mold_data_617718243_154406.csv (397

2645 mold_data_617818163_156826.csv (3971, 140)
2646 mold_data_617818369_156831.csv (3971, 140)
2647 mold_data_617818575_156836.csv (3971, 140)
2648 mold_data_617818781_156841.csv (3971, 140)
2649 mold_data_617820224_156876.csv (3971, 140)
2650 mold_data_617820430_156881.csv (3971, 140)
2651 mold_data_617820636_156886.csv (3971, 140)
2652 mold_data_617821254_156901.csv (3971, 140)
2653 mold_data_617821460_156906.csv (3971, 140)
2654 mold_data_617821667_156911.csv (3971, 140)
2655 mold_data_617823316_156951.csv (3971, 140)
2656 mold_data_617823522_156956.csv (3971, 140)
2657 mold_data_617824553_156981.csv (3971, 140)
2658 mold_data_617824759_156986.csv (3971, 140)
2659 mold_data_617825171_156996.csv (3971, 140)
2660 mold_data_617825377_157001.csv (3971, 140)
2661 mold_data_617825790_157011.csv (3971, 140)
2662 mold_data_617827438_157051.csv (3971, 140)
2663 mold_data_617827644_157056.csv (3971, 140)
2664 mold_data_617827850_157061.csv (3971, 140)
2665 mold_data_617828262_157071.csv (397

2816 mold_data_617906379_158966.csv (3971, 140)
2817 mold_data_617906585_158971.csv (3971, 140)
2818 mold_data_617907616_158996.csv (3971, 140)
2819 mold_data_617908441_159016.csv (3971, 140)
2820 mold_data_617910090_159056.csv (3971, 140)
2821 mold_data_617910709_159071.csv (3971, 140)
2822 mold_data_617910915_159076.csv (3971, 140)
2823 mold_data_617911327_159086.csv (3971, 140)
2824 mold_data_617911740_159096.csv (3971, 140)
2825 mold_data_617911946_159101.csv (3971, 140)
2826 mold_data_617912152_159106.csv (3971, 140)
2827 mold_data_617912359_159111.csv (3971, 140)
2828 mold_data_617912565_159116.csv (3971, 140)
2829 mold_data_617913802_159146.csv (3971, 140)
2830 mold_data_617914008_159151.csv (3971, 140)
2831 mold_data_617914625_159166.csv (3971, 140)
2832 mold_data_617914831_159171.csv (3971, 140)
2833 mold_data_617915243_159181.csv (3971, 140)
2834 mold_data_617915656_159191.csv (3971, 140)
2835 mold_data_617915862_159196.csv (3971, 140)
2836 mold_data_617916068_159201.csv (397

2989 mold_data_617998977_161211.csv (3971, 140)
2990 mold_data_618000628_161251.csv (3971, 140)
2991 mold_data_618001040_161261.csv (3971, 140)
2992 mold_data_618001659_161276.csv (3971, 140)
2993 mold_data_618001866_161281.csv (3971, 140)
2994 mold_data_618002278_161291.csv (3971, 140)
2995 mold_data_618002897_161306.csv (3971, 140)
2996 mold_data_618003104_161311.csv (3971, 140)
2997 mold_data_618004135_161336.csv (3971, 140)
2998 mold_data_618005579_161371.csv (3971, 140)
2999 mold_data_618005785_161376.csv (3971, 140)
3000 mold_data_618005991_161381.csv (3971, 140)
3001 mold_data_618006198_161386.csv (3971, 140)
3002 mold_data_618006404_161391.csv (3971, 140)
3003 mold_data_618006817_161401.csv (3971, 140)
3004 mold_data_618008261_161436.csv (3971, 140)
3005 mold_data_618008673_161446.csv (3971, 140)
3006 mold_data_618008880_161451.csv (3971, 140)
3007 mold_data_618009086_161456.csv (3971, 140)
3008 mold_data_618009498_161466.csv (3971, 140)
3009 mold_data_618009704_161471.csv (397

3162 mold_data_618101153_163416.csv (3971, 140)
3163 mold_data_618102184_163441.csv (3971, 140)
3164 mold_data_618102596_163451.csv (3971, 140)
3165 mold_data_618103421_163471.csv (3971, 140)
3166 mold_data_618103833_163481.csv (3971, 140)
3167 mold_data_618104864_163506.csv (3971, 140)
3168 mold_data_618105482_163521.csv (3971, 140)
3169 mold_data_618106101_163536.csv (3971, 140)
3170 mold_data_618106513_163546.csv (3971, 140)
3171 mold_data_618106720_163551.csv (3971, 140)
3172 mold_data_618106926_163556.csv (3971, 140)
3173 mold_data_618107338_163566.csv (3971, 140)
3174 mold_data_618108369_163591.csv (3971, 140)
3175 mold_data_618108988_163606.csv (3971, 140)
3176 mold_data_618109194_163611.csv (3971, 140)
3177 mold_data_618109401_163616.csv (3971, 140)
3178 mold_data_618109607_163621.csv (3971, 140)
3179 mold_data_618110019_163631.csv (3971, 140)
3180 mold_data_618110844_163651.csv (3971, 140)
3181 mold_data_618111050_163656.csv (3971, 140)
3182 mold_data_618111463_163666.csv (397

3334 mold_data_618315677_165931.csv (3971, 140)
3335 mold_data_618332067_165996.csv (3971, 140)
3336 mold_data_618334548_166056.csv (3971, 140)
3337 mold_data_618335724_166071.csv (3971, 140)
3338 mold_data_618336555_166081.csv (3971, 140)
3339 mold_data_618337383_166101.csv (3971, 140)
3340 mold_data_618337796_166111.csv (3971, 140)
3341 mold_data_618338003_166116.csv (3971, 140)
3342 mold_data_618338416_166126.csv (3971, 140)
3343 mold_data_618338623_166131.csv (3971, 140)
3344 mold_data_618338830_166136.csv (3971, 140)
3345 mold_data_618339037_166141.csv (3971, 140)
3346 mold_data_618339658_166156.csv (3971, 140)
3347 mold_data_618340071_166166.csv (3971, 140)
3348 mold_data_618340899_166186.csv (3971, 140)
3349 mold_data_618341726_166206.csv (3971, 140)
3350 mold_data_618342760_166231.csv (3971, 140)
3351 mold_data_618344621_166276.csv (3971, 140)
3352 mold_data_618345034_166286.csv (3971, 140)
3353 mold_data_618345448_166296.csv (3971, 140)
3354 mold_data_618345655_166301.csv (397

3506 mold_data_618913339_168751.csv (3971, 140)
3507 mold_data_618914570_168781.csv (3971, 140)
3508 mold_data_618914981_168791.csv (3971, 140)
3509 mold_data_618916622_168831.csv (3971, 140)
3510 mold_data_618917033_168841.csv (3971, 140)
3511 mold_data_618918675_168881.csv (3971, 140)
3512 mold_data_618918880_168886.csv (3971, 140)
3513 mold_data_618919085_168891.csv (3971, 140)
3514 mold_data_618919496_168901.csv (3971, 140)
3515 mold_data_618919701_168906.csv (3971, 140)
3516 mold_data_618920112_168916.csv (3971, 140)
3517 mold_data_618921344_168946.csv (3971, 140)
3518 mold_data_618922576_168976.csv (3971, 140)
3519 mold_data_618922781_168981.csv (3971, 140)
3520 mold_data_618923192_168991.csv (3971, 140)
3521 mold_data_618923602_169001.csv (3971, 140)
3522 mold_data_618924629_169026.csv (3971, 140)
3523 mold_data_618924834_169031.csv (3971, 140)
3524 mold_data_618925861_169056.csv (3971, 140)
3525 mold_data_618926477_169071.csv (3971, 140)
3526 mold_data_618926887_169081.csv (397

3679 mold_data_619004972_170981.csv (3971, 140)
3680 mold_data_619005178_170986.csv (3971, 140)
3681 mold_data_619005383_170991.csv (3971, 140)
3682 mold_data_619005588_170996.csv (3971, 140)
3683 mold_data_619006410_171016.csv (3971, 140)
3684 mold_data_619006615_171021.csv (3971, 140)
3685 mold_data_619007026_171031.csv (3971, 140)
3686 mold_data_619008258_171061.csv (3971, 140)
3687 mold_data_619008875_171076.csv (3971, 140)
3688 mold_data_619009696_171096.csv (3971, 140)
3689 mold_data_619010312_171111.csv (3971, 140)
3690 mold_data_619010518_171116.csv (3971, 140)
3691 mold_data_619010723_171121.csv (3971, 140)
3692 mold_data_619010928_171126.csv (3971, 140)
3693 mold_data_619011339_171136.csv (3971, 140)
3694 mold_data_619011750_171146.csv (3971, 140)
3695 mold_data_619011955_171151.csv (3971, 140)
3696 mold_data_619012366_171161.csv (3971, 140)
3697 mold_data_619012777_171171.csv (3971, 140)
3698 mold_data_619013393_171186.csv (3971, 140)
3699 mold_data_619013804_171196.csv (397

3851 mold_data_619135406_174061.csv (3971, 140)
3852 mold_data_619135603_174066.csv (3971, 140)
3853 mold_data_619135997_174076.csv (3971, 140)
3854 mold_data_619136588_174091.csv (3971, 140)
3855 mold_data_619136785_174096.csv (3971, 140)
3856 mold_data_619136982_174101.csv (3971, 140)
3857 mold_data_619138558_174141.csv (3971, 140)
3858 mold_data_619138952_174151.csv (3971, 140)
3859 mold_data_619139346_174161.csv (3971, 140)
3860 mold_data_619139543_174166.csv (3971, 140)
3861 mold_data_619140331_174186.csv (3971, 140)
3862 mold_data_619140528_174191.csv (3971, 140)
3863 mold_data_619141119_174206.csv (3971, 140)
3864 mold_data_619141513_174216.csv (3971, 140)
3865 mold_data_619141907_174226.csv (3971, 140)
3866 mold_data_619142104_174231.csv (3971, 140)
3867 mold_data_619143286_174261.csv (3971, 140)
3868 mold_data_619143877_174276.csv (3971, 140)
3869 mold_data_619144271_174286.csv (3971, 140)
3870 mold_data_619144468_174291.csv (3971, 140)
3871 mold_data_619144665_174296.csv (397

In [7]:
TEST_HIG_taskA.shape

(3971, 142)

In [8]:
TEST_HIG_taskA.rename(columns={'Time':'spcTime'}, inplace=True)
# TEST_HIG_taskA.to_csv("./Middle_result/TEST_HIG_taskA_mean.csv")

# 特征工程

In [9]:
def drop_na_col(df,drop_some_na=True):
    '''删除全是NaN值的列
    mc_speed_4、mc_pos_3、mo_speed_1、mo_pos_1、mc_press_4、mo_press_1、adjust_reasons这7列在data_set中也有不同程度的缺失
    '''
    m,n = 0,0
    for col in df.columns:
        if df[col].isna().sum()==len(df[col]):
            print(f"删掉空值列:{col}")
            df.drop(col,axis = 1,inplace=True)
            m += 1
        elif df[col].isna().sum()>0 and drop_some_na:
            print(f"删掉缺失值列:{col}")
            df.drop(col,axis = 1,inplace=True)
            n += 1
    print(f"共删除{m}个空值列,{n}个缺失值列")
    return df
def drop_same_val_col(df):
    """有些列的参数都相同，不具有表征意义，删除"""
    m = 0
    for col in df.columns:
        if df[col].nunique()==1:
            df.drop(col,axis = 1,inplace=True)
            print(f"删掉相同值列:{col}")
            m += 1
    print(f"共删除{m}个相同值列")
    return df

In [10]:
def is_col_unique_value(df):
    for col in df:
        if df[col].value_counts() is None:
            print(col,"**空值")
        elif len(df[col].value_counts())==1:
            print(col,"--One value")
def generate_profiling_report(df,name):
    from pandas_profiling import ProfileReport
    prof = ProfileReport(df)
    prof.to_file(f"{name} profile report.html")
def missing_no(df):
    n = 0
    m = 0
    for col in df.columns:
        if df[col].isna().sum() != 0:
            print(f"{col}列：{df[col].isna().sum()}/{len(df[col])}")
            n += 1
        if df[col].isna().sum() == len(df[col]):
            m += 1
    print(f"共有{len(df)}条数据，{df.shape[1]}列数据，存在缺失值的有{n}列，完全缺失数据的{m}列")

# taskA

## 合并

In [11]:
"""生成一个train_set,用于和train_spc合并"""
train_set=pd.DataFrame()
min_val = int(min(test_data_set_taskA.Id.min(),test_data_spc_taskA.Id.min()))
max_val = int(max(test_data_set_taskA.Id.max(),test_data_spc_taskA.Id.max()))
train_set["Id"]=np.array(range(min_val,max_val+1))
train_set = pd.merge(train_set,test_data_set_taskA,how = "left")
train_set.sort_values('Id',inplace=True)
train_set=train_set.fillna(method="ffill")

In [12]:
# data_set和data_spac合并
test_taskA=pd.merge(test_data_spc_taskA,train_set,on="Id",how = "left")

In [13]:
test_taskA.shape

(3971, 432)

In [14]:
test_taskA["spcTime"]=test_taskA["spcTime"].apply(int)
test_taskA["spcTime"]=test_taskA["spcTime"].apply(str)
test_taskA=test_taskA.merge(TEST_HIG_taskA,on=["Id","spcTime"])

In [15]:
test_taskA.shape

(3971, 572)

## 生成需要的格式

### 用于size1和size3

In [16]:
# 增加一些列，为了后续列的匹配
test_taskA=test_taskA.assign(isTrain=np.nan,size1=np.nan,size2=np.nan,size3=np.nan);

In [17]:
test_taskA.shape

(3971, 576)

In [18]:
my_train=pd.read_csv("./Middle_result/my_train_add_mean_std_max_min_median_indiv_features.csv")
my_train.shape

(20552, 218)

In [19]:
test_taskA=test_taskA[[col for col in my_train.columns]]

In [20]:
test_taskA.to_csv("./Middle_result/test_taskA_add_mean_std_max_min_median_indiv_features.csv",index=False)

In [21]:
test_taskA.shape

(3971, 218)

### 用于size2

In [22]:
test_taskA["spcTime_x"]=test_taskA["spcTime" ]
test_taskA["spcTime_y"]=test_taskA["spcTime" ]
test_taskA.rename(lambda x:x.replace("_var","_std"),axis=1,inplace=True)

In [23]:
my_train=pd.read_csv("./Middle_result/my_train_add_mean_std_max_min.csv")
test_taskA=test_taskA[[col for col in my_train.columns]]#主要用于size2

In [24]:
test_taskA.to_csv("./Middle_result/test_taskA_add_mean_std_max_min.csv",index=False)

In [25]:
test_taskA.shape,my_train.shape

((3971, 170), (20552, 170))