# 一、建模场景分析

## 1.1 建模目的

在制动系统中，总风管是连接总风缸和制动系统以及其他用风设备的管路，总风缸由风源系统充风获得压力空气并存储，供给列车上所有的用风模块。管路、橡胶件、管路接头的密封性能下降具有严重的危害性，它不仅妨碍了列车的正常运行，损害了列车的寿命，更危害了顾客的安全。

![](./制动系统简化模型图.png)

因此针对由于管路、橡胶件、管路接头的密封性能下降，开发泄漏预警算法是极为重要的。开发泄漏预警算法分为总风管路泄漏故障和制动缸及相连管路泄漏故障，通过分析空压机的启停，制动过程消耗的压力空气，以及两者泄漏的压力空气之间的机理关系进行模型的建立。

## 1.2 建模环境

Windows10 Python 3.7.3

pandas==0.24.2

In [1]:
import pandas as pd
import os, json

# 二、读取数据

In [2]:
data_file_name = 'data_d16_V1.csv'
data = pd.read_csv(data_file_name)

In [36]:
data

Unnamed: 0,time,CTPCBC_制动_空压机组1运行,CTPCBC_制动_空压机组2运行,BC1CT_制动_A1车B1架制动施加
0,2020-07-16 21:00:18.138,0.0,0.0,1.0
1,2020-07-16 21:00:18.769,0.0,0.0,1.0
2,2020-07-16 21:00:18.917,0.0,0.0,1.0
3,2020-07-16 21:00:19.487,0.0,0.0,1.0
4,2020-07-16 21:00:19.654,0.0,0.0,1.0
5,2020-07-16 21:00:20.250,0.0,0.0,1.0
6,2020-07-16 21:00:20.421,0.0,0.0,1.0
7,2020-07-16 21:00:21.008,0.0,0.0,1.0
8,2020-07-16 21:00:21.182,0.0,0.0,1.0
9,2020-07-16 21:00:21.737,0.0,0.0,1.0


# 三、特征生成

In [4]:
feature_file_name = 'brake-v1.csv'
feature_data = pd.read_csv(feature_file_name)

根据建模公式生成特征空压机工作率，制动频率，制动缸保压率

In [5]:
time_count_move_1 = data['CTPCBC_制动_空压机组1运行'].sum()
time_count_move_2 = data['CTPCBC_制动_空压机组2运行'].sum()
time_lenth = data.shape[0]
brake_diff = data['BC1CT_制动_A1车B1架制动施加'].diff()
brake_count = brake_diff[brake_diff == 1].count()
brake_time = data['BC1CT_制动_A1车B1架制动施加'].sum()
date_time = str(str(pd.to_datetime(data['time'][0]))[:10])
time_count = time_count_move_1 + time_count_move_2

In [6]:
#空压机
ratio = time_count / time_lenth
ratio

In [6]:
#制动频率
brake_count_ratio = brake_count / time_lenth
#制动缸保压率
brake_time_ratio = brake_time / time_lenth

In [7]:
feature_data = feature_data.append({'time': date_time, 'ratio':ratio, 'brake_count':brake_count_ratio, 'brake_time':brake_time_ratio
                                   ,'time_lenth': time_lenth},ignore_index=True)
feature_data

Unnamed: 0,time,ratio,brake_count,brake_time,time_lenth
0,2020-07-06,0.15498,0.036186,0.443248,31946
1,2020-07-07,0.146846,0.008929,0.491551,28111
2,2020-07-08,0.189425,0.021811,0.732835,11783
3,2020-07-09,0.164853,0.02005,0.396308,55959
4,2020-07-10,0.165328,0.006245,0.386093,54764
5,2020-07-11,0.107576,0.074407,0.715734,102988
6,2020-07-12,0.456347,0.073163,0.724482,51474
7,2020-07-13,0.128562,0.113171,0.618533,80568
8,2020-07-14,0.152027,0.0919,0.492411,71349
9,2020-07-15,0.169687,0.087884,0.487911,80902


# 三、模型训练

In [8]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [9]:
file_feature_name = "brake-v1.csv"
old_file_name = "statistics.csv"

In [88]:
df = pd.read_csv(old_file_name)
df = df.iloc[:150, :]
df

Unnamed: 0,brake,tratio,ratio
0,0.002778,0.842222,0.049444
1,0.004226,0.718512,0.154583
2,0.009826,0.369792,0.188646
3,0.010926,0.394222,0.230074
4,0.001667,0.941282,0.056282
5,0.009752,0.377801,0.197340
6,0.011032,0.396111,0.197183
7,0.009886,0.365303,0.213485
8,0.000833,0.998030,0.044091
9,0.000000,1.000000,0.047083


In [11]:
ratio_list = []
b_count_list = []
b_time_list = []

In [12]:
df = pd.read_csv(old_file_name, nrows=150)
BASE_SURFACE_X = df['brake']
BASE_SURFACE_Y = df['tratio']
A = 13.2314
B = 0.01858
C = 0.05499
BASE_SURFACE_Z = BASE_SURFACE_X * A + BASE_SURFACE_Y * B + C
base_x = np.hstack((np.array(BASE_SURFACE_X).reshape(-1,1) , np.array(BASE_SURFACE_Y).reshape(-1,1)))
base_y = np.array(BASE_SURFACE_Z).reshape(-1,1)

In [62]:
BASE_SURFACE_X

0      0.002778
1      0.004226
2      0.009826
3      0.010926
4      0.001667
5      0.009752
6      0.011032
7      0.009886
8      0.000833
9      0.000000
10     0.000519
11     0.006538
12     0.000595
13     0.006632
14     0.006742
15     0.006735
16     0.006993
17     0.006560
18     0.006985
19     0.006702
20     0.006572
21     0.000614
22     0.006754
23     0.003542
24     0.006905
25     0.006283
26     0.006818
27     0.006960
28     0.000542
29     0.006577
         ...   
120    0.006193
121    0.006159
122    0.006757
123    0.000833
124    0.000052
125    0.002333
126    0.000441
127    0.002083
128    0.000000
129    0.000000
130    0.000000
131    0.000000
132    0.000000
133    0.000833
134    0.000000
135    0.000667
136    0.001515
137    0.000000
138    0.000000
139    0.001429
140    0.000000
141    0.000000
142    0.000000
143    0.001875
144    0.000000
145    0.000000
146    0.000833
147    0.000175
148    0.000000
149    0.000156
Name: brake, Length: 150

In [63]:
np.array(BASE_SURFACE_X).reshape(-1,1)

array([[2.7777780e-03],
       [4.2261900e-03],
       [9.8263890e-03],
       [1.0925926e-02],
       [1.6666670e-03],
       [9.7517730e-03],
       [1.1031746e-02],
       [9.8863640e-03],
       [8.3333300e-04],
       [0.0000000e+00],
       [5.1851900e-04],
       [6.5384620e-03],
       [5.9523800e-04],
       [6.6315790e-03],
       [6.7424240e-03],
       [6.7346940e-03],
       [6.9927540e-03],
       [6.5602840e-03],
       [6.9850190e-03],
       [6.7021280e-03],
       [6.5719700e-03],
       [6.1403500e-04],
       [6.7543860e-03],
       [3.5416670e-03],
       [6.9047620e-03],
       [6.2833330e-03],
       [6.8181820e-03],
       [6.9597070e-03],
       [5.4166700e-04],
       [6.5770610e-03],
       [7.3148150e-03],
       [1.0540541e-02],
       [1.0387597e-02],
       [4.0476200e-04],
       [1.2121200e-04],
       [2.9738560e-03],
       [9.3840580e-03],
       [9.1666670e-03],
       [1.0227273e-02],
       [7.2142860e-03],
       [2.7688170e-03],
       [6.282051

In [69]:
His=pd.read_csv(file_feature_name)
for key in sorted(His['time']):
    ratio_list.append(His[His['time']== key]['ratio'].values[0])
    b_count_list.append(His[His['time']== key]['brake_count'].values[0])
    b_time_list.append(His[His['time']== key]['brake_time'].values[0])
    date_time = key
    
if len(ratio_list)>=10:
    fit1 = np.array(ratio_list[-10:], dtype ='float64').reshape(-1,1)#转换成一列
    fit2 = np.array(b_count_list[-10:], dtype ='float64').reshape(-1,1)
    fit3 = np.array(b_time_list[-10:], dtype ='float64').reshape(-1,1)
    fit4 = np.hstack((fit2,fit3))
    Model = LinearRegression()
    Model.fit(np.vstack((base_x,fit4)),np.vstack((base_y,fit1)))
    dict_LR = {'time':date_time, 'A':float(Model.coef_[0][0]), 'B':float(Model.coef_[0][1]), 'C':float(Model.intercept_)}

param_data = pd.DataFrame([dict_LR])
param_data.to_csv('model.csv', index= False)

In [68]:
#reshape(1,-1)转化成1行：
#reshape(2,-1)转换成两行：
#reshape(-1,1)转换成1列：
#reshape(-1,2)转化成两列

In [16]:
param_data

Unnamed: 0,A,B,C,time
0,1.661305,-0.036643,0.133181,2020-07-15


In [56]:
ratio_list

[0.15498027922118576,
 0.14684643022304436,
 0.18942544343545786,
 0.16485283868546616,
 0.16532758746621867,
 0.1075756398803744,
 0.45634689357734,
 0.12856220832092147,
 0.15202735847734375,
 0.16968678153815728]

# 四、模型预测

In [17]:
file_name = 'model.csv'
df = pd.read_csv(file_name)
df

Unnamed: 0,A,B,C,time
0,1.661305,-0.036643,0.133181,2020-07-15


In [18]:
df['C'][0]

0.13318063979749264

In [20]:
if df['C'][0]> 0.05499:
    df['result']= '总风管泄露'
elif df['B'][0]> 0.01858:
    df['result']= '制动系统泄露'
else:
    df['result']= '正常'

df


Unnamed: 0,A,B,C,time,result
0,1.661305,-0.036643,0.133181,2020-07-15,总风管
