# 철강 공장 데이터 분석

- PLATE_NO         :     플렌트 고유 ID
- ROLLING_DATE     :    제작일자
- SCALE            :     양품/불량
- SPEC             :     SPEC
- STEEL_KIND:            종류
- PT_THK:                두께
- PT_WDTH:               너비
- PT_LTH:                길이
- PT_WGT:                무게
- FUR_NO:                생산시설 NO
- FUR_NO_ROW:            생산시설 ROW
- FUR_HZ_TEMP:           가열로_HZ가열로_온도
- FUR_HZ_TIME:          가열로_HZ가열로_시간
- FUR_SZ_TEMP:           가열로_SZ가열로_온도
- FUR_SZ_TIME:          가열로_SZ가열로_시간
- FUR_TIME:              가열로_내부에 있었던 시간
- FUR_EXTEMP:            가열로_추출온도
- ROLLING_TEMP_T5:       롤링_온도
- HSB:                   HSB 적용여부
- ROLLING_DESCALING:     ROLLING_DESCALING 작업 횟수
- WORK_GR:               작업그룹

<프로젝트의 목표> 
- 공장제조데이터를 활용하여 아래의 내용을 작성합니다.
- 탐색적 데이터 분석를 활용한 데이터 시각화
- 시각화, 통계를 활용한 인사이트 도출
- 불량을 분류하는 머신러닝 모델 개발

<주의 사항>
- 공공데이터는 추가로 자유롭게 활용 가능하며 신뢰성이 있어야 함(출처표기 필요)	

In [1]:
import pandas as pd

data = pd.read_csv('data_pj1/steel_date.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,PLATE_NO,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,...,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,0,PB562774,2008-08-01:00:00:15,양품,AB/EH32-TM,T1,32.25,3707,15109,14180,...,1144,116,1133,59,282,1133,934,적용,8,2조
1,1,PB562775,2008-08-01:00:00:16,양품,AB/EH32-TM,T1,32.25,3707,15109,14180,...,1144,122,1135,53,283,1135,937,적용,8,2조
2,2,PB562776,2008-08-01:00:00:59,양품,NV-E36-TM,T8,33.27,3619,19181,18130,...,1129,116,1121,55,282,1121,889,적용,8,3조
3,3,PB562777,2008-08-01:00:01:24,양품,NV-E36-TM,T8,33.27,3619,19181,18130,...,1152,125,1127,68,316,1127,885,적용,8,3조
4,4,PB562778,2008-08-01:00:01:44,양품,BV-EH36-TM,T8,38.33,3098,13334,12430,...,1140,134,1128,48,314,1128,873,적용,8,1조


In [2]:
data.columns

Index(['Unnamed: 0', 'PLATE_NO', 'ROLLING_DATE', 'SCALE', 'SPEC', 'STEEL_KIND',
       'PT_THK', 'PT_WDTH', 'PT_LTH', 'PT_WGT', 'FUR_NO', 'FUR_NO_ROW',
       'FUR_HZ_TEMP', 'FUR_HZ_TIME', 'FUR_SZ_TEMP', 'FUR_SZ_TIME', 'FUR_TIME',
       'FUR_EXTEMP', 'ROLLING_TEMP_T5', 'HSB', 'ROLLING_DESCALING', 'WORK_GR'],
      dtype='object')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 22 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         720 non-null    int64  
 1   PLATE_NO           720 non-null    object 
 2   ROLLING_DATE       720 non-null    object 
 3   SCALE              720 non-null    object 
 4   SPEC               720 non-null    object 
 5   STEEL_KIND         720 non-null    object 
 6   PT_THK             720 non-null    float64
 7   PT_WDTH            720 non-null    int64  
 8   PT_LTH             720 non-null    int64  
 9   PT_WGT             720 non-null    int64  
 10  FUR_NO             720 non-null    object 
 11  FUR_NO_ROW         720 non-null    int64  
 12  FUR_HZ_TEMP        720 non-null    int64  
 13  FUR_HZ_TIME        720 non-null    int64  
 14  FUR_SZ_TEMP        720 non-null    int64  
 15  FUR_SZ_TIME        720 non-null    int64  
 16  FUR_TIME           720 non

In [4]:
data.isnull().sum()

Unnamed: 0           0
PLATE_NO             0
ROLLING_DATE         0
SCALE                0
SPEC                 0
STEEL_KIND           0
PT_THK               0
PT_WDTH              0
PT_LTH               0
PT_WGT               0
FUR_NO               0
FUR_NO_ROW           0
FUR_HZ_TEMP          0
FUR_HZ_TIME          0
FUR_SZ_TEMP          0
FUR_SZ_TIME          0
FUR_TIME             0
FUR_EXTEMP           0
ROLLING_TEMP_T5      0
HSB                  0
ROLLING_DESCALING    0
WORK_GR              0
dtype: int64

# 데이터 전처리
- SCALE : 0(불량), 1(양품)
- HSB : 0(미적용), 1(적용)

In [5]:
# 불량 양품 나중에 Regression 모델에 적용하기 위해, 미리 정수로 교환
data['SCALE'] = data['SCALE'].replace("불량", 0).replace("양품", 1)
data = data.drop(['Unnamed: 0'], axis = 1)
data

Unnamed: 0,PLATE_NO,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,...,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,PB562774,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,...,1144,116,1133,59,282,1133,934,적용,8,2조
1,PB562775,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,...,1144,122,1135,53,283,1135,937,적용,8,2조
2,PB562776,2008-08-01:00:00:59,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,...,1129,116,1121,55,282,1121,889,적용,8,3조
3,PB562777,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,...,1152,125,1127,68,316,1127,885,적용,8,3조
4,PB562778,2008-08-01:00:01:44,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,...,1140,134,1128,48,314,1128,873,적용,8,1조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,PB563502,2008-08-02:13:35:36,0,NK-KA,C0,20.14,3580,38639,21870,3호기,...,1172,72,1164,62,245,1164,1005,적용,8,2조
716,PB563503,2008-08-02:13:35:02,1,NV-A32,C0,15.08,3212,48233,18340,2호기,...,1150,61,1169,61,238,1169,947,적용,10,1조
717,PB563504,2008-08-02:14:40:00,1,NV-A32,C0,16.60,3441,43688,19590,2호기,...,1169,65,1163,77,247,1163,948,적용,10,4조
718,PB563505,2008-08-02:13:35:19,1,LR-A,C0,15.59,3363,48740,80240,3호기,...,1179,86,1163,45,243,1163,940,적용,10,2조


In [6]:
# PLATE_NO은 다 다른 종류이기 때문에 뺄 수 있따.
print(len(data['PLATE_NO'].unique()))
# PLATE_NO는 720개이기 때문에 삭제 가능할 것 같다.
data = data.drop(['PLATE_NO'], axis = 1)
data

720


Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,1144,116,1133,59,282,1133,934,적용,8,2조
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,적용,8,2조
2,2008-08-01:00:00:59,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,1,1129,116,1121,55,282,1121,889,적용,8,3조
3,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,2,1152,125,1127,68,316,1127,885,적용,8,3조
4,2008-08-01:00:01:44,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,1,1140,134,1128,48,314,1128,873,적용,8,1조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2008-08-02:13:35:36,0,NK-KA,C0,20.14,3580,38639,21870,3호기,1,1172,72,1164,62,245,1164,1005,적용,8,2조
716,2008-08-02:13:35:02,1,NV-A32,C0,15.08,3212,48233,18340,2호기,1,1150,61,1169,61,238,1169,947,적용,10,1조
717,2008-08-02:14:40:00,1,NV-A32,C0,16.60,3441,43688,19590,2호기,2,1169,65,1163,77,247,1163,948,적용,10,4조
718,2008-08-02:13:35:19,1,LR-A,C0,15.59,3363,48740,80240,3호기,2,1179,86,1163,45,243,1163,940,적용,10,2조


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ROLLING_DATE       720 non-null    object 
 1   SCALE              720 non-null    int64  
 2   SPEC               720 non-null    object 
 3   STEEL_KIND         720 non-null    object 
 4   PT_THK             720 non-null    float64
 5   PT_WDTH            720 non-null    int64  
 6   PT_LTH             720 non-null    int64  
 7   PT_WGT             720 non-null    int64  
 8   FUR_NO             720 non-null    object 
 9   FUR_NO_ROW         720 non-null    int64  
 10  FUR_HZ_TEMP        720 non-null    int64  
 11  FUR_HZ_TIME        720 non-null    int64  
 12  FUR_SZ_TEMP        720 non-null    int64  
 13  FUR_SZ_TIME        720 non-null    int64  
 14  FUR_TIME           720 non-null    int64  
 15  FUR_EXTEMP         720 non-null    int64  
 16  ROLLING_TEMP_T5    720 non

In [8]:
# SPEC Data 길이 확인
len(data['SPEC'].unique())

66

In [9]:
# 일한 그룹 unique값 확인
data['WORK_GR'].unique()

array(['2조', '3조', '1조', '4조'], dtype=object)

In [10]:
# STEEL 종류 확인
data['STEEL_KIND'].unique()

array(['T1', 'T8', 'T0', 'T5', 'C0', 'C3', 'C1', 'T7', 'T3'], dtype=object)

In [11]:
data['ROLLING_DATE'].unique()

array(['2008-08-01:00:00:15', '2008-08-01:00:00:16',
       '2008-08-01:00:00:59', '2008-08-01:00:01:24',
       '2008-08-01:00:01:44', '2008-08-01:00:02:06',
       '2008-08-01:00:02:28', '2008-08-01:00:02:21',
       '2008-08-01:00:02:51', '2008-08-01:00:03:15',
       '2008-08-01:00:03:24', '2008-08-01:00:04:15',
       '2008-08-01:00:04:20', '2008-08-01:00:05:47',
       '2008-08-01:00:05:25', '2008-08-01:00:05:16',
       '2008-08-01:01:10:14', '2008-08-01:01:10:44',
       '2008-08-01:01:11:01', '2008-08-01:01:11:08',
       '2008-08-01:01:12:45', '2008-08-01:01:12:49',
       '2008-08-01:01:13:47', '2008-08-01:01:13:05',
       '2008-08-01:01:14:20', '2008-08-01:01:14:53',
       '2008-08-01:01:14:25', '2008-08-01:01:15:39',
       '2008-08-01:01:15:14', '2008-08-01:01:15:59',
       '2008-08-01:01:15:34', '2008-08-01:02:20:52',
       '2008-08-01:02:20:28', '2008-08-01:02:21:27',
       '2008-08-01:02:21:11', '2008-08-01:02:21:37',
       '2008-08-01:02:21:17', '2008-08-01:02:2

In [12]:
data['PT_THK'].unique()

array([ 32.25 ,  33.27 ,  38.33 ,  38.43 ,  30.23 ,  34.28 ,  50.46 ,
        44.39 ,  48.44 ,  45.4  ,  44.9  ,  51.27 ,  55.51 ,  55.5  ,
        40.   ,  60.58 ,  64.489,  60.63 ,  64.12 ,  60.57 ,  65.64 ,
        67.65 ,  65.69 ,  70.7  ,  73.71 ,  70.6  ,  75.71 ,  75.81 ,
        83.32 ,  89.63 ,  20.13 ,  17.1  ,  14.09 ,  20.43 ,  16.09 ,
        15.59 ,  18.1  ,  17.4  ,  20.1  ,  21.11 ,  14.06 ,  15.09 ,
        15.38 ,  16.6  ,  18.11 ,  18.62 ,  19.13 ,  21.15 ,  22.16 ,
        22.66 ,  25.12 ,  30.   ,  35.15 ,  40.16 ,  55.2  ,  80.28 ,
        32.06 ,  22.15 ,  22.2  ,  21.14 ,  20.18 ,  19.62 ,  19.1  ,
        19.12 ,  18.4  ,  18.16 ,  30.13 ,  30.24 ,  50.19 ,  38.16 ,
        25.18 ,  24.17 ,  19.63 ,  15.08 ,  20.   ,  20.14 ,  23.16 ,
        25.19 ,  26.24 ,  24.11 ,  24.18 ,  20.44 ,  20.63 ,  17.6  ,
        16.39 ,  14.07 ,  13.06 ,  13.56 ,  12.05 ,  21.64 ,  21.94 ,
        19.42 ,  20.42 ,  17.09 ,  15.58 ,  12.8  ,  12.03 ,  13.54 ,
        14.05 ,  16.

In [13]:
# HSB : 적용여부
data['HSB'].unique()

array(['적용', '미적용'], dtype=object)

In [14]:
# HSB 적용 여부(교량용 고성능 강재 - 다리에 적용하는 고성능 강재를 적용한 것인지 아닌지)
data['HSB']= data['HSB'].replace('적용', 1).replace('미적용', 0)

In [15]:
data['FUR_TIME'].unique()

array([282, 283, 316, 314, 289, 294, 293, 298, 297, 299, 290, 324, 311,
       312, 329, 315, 313, 334, 335, 336, 347, 351, 332, 344, 339, 337,
       338, 330, 340, 341, 359, 345, 358, 352, 346, 357, 343, 356, 350,
       288, 355, 296, 310, 304, 275, 291, 319, 325, 323, 331, 321, 342,
       348, 353, 354, 360, 363, 349, 362, 366, 333, 317, 318, 328, 320,
       303, 308, 326, 309, 295, 322, 302, 306, 307, 327, 276, 284, 278,
       285, 279, 277, 271, 286, 272, 270, 281, 280, 267, 264, 257, 274,
       273, 260, 261, 269, 262, 258, 268, 266, 246, 254, 256, 259, 265,
       263, 245, 252, 250, 251, 249, 224, 234, 238, 239, 221, 236, 233,
       229, 240, 227, 235, 228, 230, 231, 243, 241, 237, 232, 244, 242,
       248, 253, 247, 364, 361, 371, 372, 370, 369, 376, 375, 379, 377,
       365, 382, 390, 378, 383, 398, 395, 373, 385, 255], dtype=int64)

In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ROLLING_DATE       720 non-null    object 
 1   SCALE              720 non-null    int64  
 2   SPEC               720 non-null    object 
 3   STEEL_KIND         720 non-null    object 
 4   PT_THK             720 non-null    float64
 5   PT_WDTH            720 non-null    int64  
 6   PT_LTH             720 non-null    int64  
 7   PT_WGT             720 non-null    int64  
 8   FUR_NO             720 non-null    object 
 9   FUR_NO_ROW         720 non-null    int64  
 10  FUR_HZ_TEMP        720 non-null    int64  
 11  FUR_HZ_TIME        720 non-null    int64  
 12  FUR_SZ_TEMP        720 non-null    int64  
 13  FUR_SZ_TIME        720 non-null    int64  
 14  FUR_TIME           720 non-null    int64  
 15  FUR_EXTEMP         720 non-null    int64  
 16  ROLLING_TEMP_T5    720 non

In [17]:
data.head(10)

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,1144,116,1133,59,282,1133,934,1,8,2조
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,1,8,2조
2,2008-08-01:00:00:59,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,1,1129,116,1121,55,282,1121,889,1,8,3조
3,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,2,1152,125,1127,68,316,1127,885,1,8,3조
4,2008-08-01:00:01:44,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,1,1140,134,1128,48,314,1128,873,1,8,1조
5,2008-08-01:00:02:06,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,2,1143,127,1128,57,314,1128,874,1,8,4조
6,2008-08-01:00:02:28,1,BV-EH36-TM,T8,38.33,3099,16719,15590,1호기,1,1138,126,1130,50,289,1130,878,1,8,2조
7,2008-08-01:00:02:21,1,BV-EH36-TM,T8,38.33,3099,16719,15590,1호기,2,1139,126,1131,52,294,1131,870,1,8,4조
8,2008-08-01:00:02:51,1,BV-EH36-TM,T8,38.33,3099,16719,15590,2호기,1,1127,126,1122,52,293,1122,873,1,8,1조
9,2008-08-01:00:03:15,1,COMMON,T8,38.43,3129,16187,15280,2호기,2,1135,119,1124,73,298,1124,881,1,8,4조


In [18]:
# ROLLING_DESCALING 작업 횟수의 유니크 값
data['ROLLING_DESCALING'].unique()

array([ 8,  5,  6, 10,  9,  7], dtype=int64)

In [19]:
# HSB 데이터가 상대적으로 많고, 교량을 만들기 위한 고강도 철을 만든 Data 같다.

In [20]:
data['FUR_NO'].unique()

array(['1호기', '2호기', '3호기'], dtype=object)

In [21]:
#Steel_Kind 유니크 값
data['STEEL_KIND'].unique()

array(['T1', 'T8', 'T0', 'T5', 'C0', 'C3', 'C1', 'T7', 'T3'], dtype=object)

In [22]:
data[data['STEEL_KIND'] == 'T1']

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,1144,116,1133,59,282,1133,934,1,8,2조
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,1,8,2조
14,2008-08-01:00:05:25,1,GL-E32-TM,T1,34.28,2207,30543,18140,2호기,1,1119,126,1119,72,311,1119,931,1,8,3조
15,2008-08-01:00:05:16,0,GL-E32-TM,T1,50.46,2185,21767,37680,3호기,1,1127,127,1123,71,312,1123,929,1,5,2조
16,2008-08-01:01:10:14,1,GL-E32-TM,T1,50.46,2200,21756,37920,2호기,2,1134,127,1124,92,329,1124,929,1,6,2조
17,2008-08-01:01:10:14,1,GL-E32-TM,T1,50.46,2200,21756,37920,3호기,2,1124,117,1124,87,315,1124,929,1,6,3조
18,2008-08-01:01:10:44,1,GL-E32-TM,T1,50.46,2200,21756,37920,1호기,1,1129,122,1125,78,313,1125,925,1,6,2조
19,2008-08-01:01:11:01,1,GL-E32-TM,T1,50.46,2200,21756,37920,2호기,2,1124,54,1127,78,312,1127,928,1,6,2조
26,2008-08-01:01:14:53,1,COMMON,T1,45.4,2150,18453,14140,2호기,1,1123,62,1123,101,332,1123,933,1,6,1조
27,2008-08-01:01:14:25,1,COMMON,T1,45.4,2150,18453,14140,2호기,2,1132,70,1126,95,344,1126,933,1,6,1조


In [23]:
data[data['STEEL_KIND'] == 'T0']

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
13,2008-08-01:00:05:47,1,COMMON,T0,30.23,1940,34797,16020,1호기,2,1119,130,1120,65,324,1120,926,1,8,4조
398,2008-08-01:18:82:05,1,NK-KA32-TM,T0,22.15,2884,30690,61560,2호기,1,1173,73,1153,58,242,1153,915,1,8,1조
399,2008-08-01:18:83:42,0,GL-A32-TM,T0,13.06,3328,33383,22780,1호기,1,1179,66,1153,57,241,1153,863,1,9,3조
400,2008-08-01:18:83:31,1,GL-A32-TM,T0,15.08,3239,36930,28320,1호기,2,1137,55,1136,68,245,1136,923,1,10,3조
401,2008-08-01:18:83:07,1,GL-A32-TM,T0,15.08,2724,37058,23900,2호기,1,1147,53,1133,66,243,1133,916,1,10,1조
428,2008-08-01:20:02:58,0,NV-A32-TM,T0,40.35,2772,25978,45620,3호기,2,1164,77,1136,90,278,1136,897,1,5,4조
429,2008-08-01:20:02:10,1,NV-A32-TM,T0,40.35,2772,25978,45620,3호기,1,1171,67,1139,94,276,1139,897,1,6,1조
434,2008-08-01:20:05:10,1,NV-D32-TM,T0,40.34,2132,26513,35800,3호기,1,1173,92,1138,80,291,1138,895,1,6,4조
447,2008-08-01:22:22:43,1,GL-A32-TM,T0,40.35,3145,18691,37240,3호기,2,1161,100,1120,136,341,1120,894,1,6,1조
457,2008-08-01:23:30:27,1,BV-AH32-TM,T0,50.41,3065,12334,14960,1호기,1,1174,129,1137,113,354,1137,880,1,6,2조


In [24]:
data[data['HSB'] == 0]
# ??? HSB가 적용 안된 제품들은 다 불량???????? -> HSB 0인 데이터를 다 버려도 되지 않을까 싶다.
print(len(data[data['HSB'] == 0]))
# 데이터 값이 총 33개이므로 삭제해도 무의미?
data[data['HSB'] == 0]

33


Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
71,2008-08-01:04:40:16,0,BV-EH36-TM,T8,73.71,2725,11009,17360,3호기,2,1142,93,1134,61,355,1134,847,0,6,1조
79,2008-08-01:04:45:03,0,GL-E36-TM,T8,75.71,2505,9249,13770,1호기,2,1142,82,1131,69,314,1131,847,0,6,4조
85,2008-08-01:05:52:32,0,AB/EH36-TM,T8,83.32,2560,10409,17430,1호기,2,1124,77,1124,99,342,1124,846,0,6,2조
112,2008-08-01:06:63:49,0,KR-B,C0,15.59,3105,54290,82520,3호기,1,1148,103,1153,91,356,1153,937,0,10,2조
163,2008-08-01:08:85:20,0,V42JBN3,C3,32.06,2200,37440,62190,1호기,2,1166,75,1166,72,316,1166,901,0,8,2조
246,2008-08-01:12:21:38,0,JS-SM490YB,C0,16.09,2559,37250,36120,1호기,1,1153,67,1151,44,269,1151,976,0,10,3조
250,2008-08-01:12:22:27,0,JS-SM490YB,C0,16.09,2512,37380,35580,3호기,1,1144,56,1150,59,273,1150,972,0,10,4조
290,2008-08-01:14:40:22,0,AB/A,C0,25.18,3379,32519,65160,3호기,2,1137,52,1167,66,260,1167,989,0,8,1조
303,2008-08-01:14:43:57,0,AB/B,C0,18.11,3599,34418,35220,2호기,2,1115,57,1160,71,236,1160,964,0,10,3조
327,2008-08-01:15:53:06,0,LR-A,C0,21.94,3813,31962,20990,2호기,1,1166,71,1165,61,242,1165,951,0,8,4조


In [25]:
data_HSB = data[data['HSB'] == 1]
data_HSB[data_HSB['SCALE'] == 0]

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
11,2008-08-01:00:04:15,0,COMMON,T8,38.43,3129,16187,30560,3호기,2,1131,120,1125,68,299,1125,1057,1,8,2조
15,2008-08-01:00:05:16,0,GL-E32-TM,T1,50.46,2185,21767,37680,3호기,1,1127,127,1123,71,312,1123,929,1,5,2조
60,2008-08-01:03:33:52,0,COMMON,T8,65.69,2498,11978,15430,1호기,1,1142,110,1134,73,343,1134,851,1,5,4조
107,2008-08-01:06:62:20,0,JS-SM490YB,C0,14.09,3094,41815,85860,3호기,2,1151,104,1155,75,349,1155,930,1,9,4조
114,2008-08-01:06:64:58,0,JS-SM490YB,C0,18.10,3094,41786,91850,1호기,1,1158,130,1162,54,345,1162,1004,1,10,2조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,2008-08-02:11:14:33,0,NV-A32,C0,17.10,2346,42741,13460,1호기,1,1171,55,1159,69,252,1159,1013,1,10,2조
671,2008-08-02:11:15:52,0,A283-C,C0,19.18,3125,28607,40380,1호기,1,1161,62,1156,69,250,1156,988,1,9,4조
677,2008-08-02:12:20:20,0,SA283-C,C0,33.67,3127,19298,47850,1호기,1,1181,72,1166,60,248,1166,1037,1,8,3조
685,2008-08-02:12:23:11,0,LR-AH32,C0,19.63,3097,47041,22450,3호기,1,1177,72,1164,60,253,1164,1004,1,10,4조


In [26]:
#- FUR_NO_ROW:생산시설 ROW ??? 1 열인지 2열 인지??? 뭐지???

data['FUR_NO_ROW'].unique()

array([1, 2], dtype=int64)

In [27]:
# FUR_NO는 생산호기
data['FUR_NO'].unique()

array(['1호기', '2호기', '3호기'], dtype=object)

In [28]:
data[data['FUR_NO'] == "1호기"]

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,1144,116,1133,59,282,1133,934,1,8,2조
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,1,8,2조
6,2008-08-01:00:02:28,1,BV-EH36-TM,T8,38.33,3099,16719,15590,1호기,1,1138,126,1130,50,289,1130,878,1,8,2조
7,2008-08-01:00:02:21,1,BV-EH36-TM,T8,38.33,3099,16719,15590,1호기,2,1139,126,1131,52,294,1131,870,1,8,4조
12,2008-08-01:00:04:20,1,COMMON,T8,38.43,3129,16187,15280,1호기,1,1132,125,1127,62,290,1127,820,1,8,3조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,2008-08-02:13:31:03,1,LR-AH32,C0,12.05,2855,52321,28260,1호기,2,1159,54,1156,74,252,1156,894,1,10,3조
707,2008-08-02:13:33:26,1,NV-A32,C0,12.05,3088,50530,14760,1호기,1,1183,66,1166,68,257,1166,896,1,10,4조
708,2008-08-02:13:33:52,1,NV-A32,C0,12.05,3018,50056,14290,1호기,2,1173,55,1164,78,259,1164,891,1,10,2조
713,2008-08-02:13:35:24,1,NV-A32,C0,18.11,3260,40586,37620,1호기,1,1174,66,1160,71,240,1160,965,1,10,3조


In [29]:
data[data['FUR_NO_ROW'] == 2]

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,1,8,2조
3,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,2,1152,125,1127,68,316,1127,885,1,8,3조
5,2008-08-01:00:02:06,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,2,1143,127,1128,57,314,1128,874,1,8,4조
7,2008-08-01:00:02:21,1,BV-EH36-TM,T8,38.33,3099,16719,15590,1호기,2,1139,126,1131,52,294,1131,870,1,8,4조
9,2008-08-01:00:03:15,1,COMMON,T8,38.43,3129,16187,15280,2호기,2,1135,119,1124,73,298,1124,881,1,8,4조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
710,2008-08-02:13:34:44,1,LR-AH32,C0,12.05,3095,53763,47220,2호기,2,1174,74,1159,61,238,1159,919,1,10,2조
712,2008-08-02:13:34:32,1,LR-A,C0,16.60,3528,32323,14860,3호기,2,1156,65,1160,72,244,1160,938,1,10,1조
714,2008-08-02:14:40:33,1,LR-A,C0,18.92,3401,41455,20940,1호기,2,1142,55,1151,86,246,1151,948,1,10,4조
717,2008-08-02:14:40:00,1,NV-A32,C0,16.60,3441,43688,19590,2호기,2,1169,65,1163,77,247,1163,948,1,10,4조


In [30]:
data

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,FUR_HZ_TEMP,FUR_HZ_TIME,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,1144,116,1133,59,282,1133,934,1,8,2조
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,1144,122,1135,53,283,1135,937,1,8,2조
2,2008-08-01:00:00:59,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,1,1129,116,1121,55,282,1121,889,1,8,3조
3,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,2,1152,125,1127,68,316,1127,885,1,8,3조
4,2008-08-01:00:01:44,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,1,1140,134,1128,48,314,1128,873,1,8,1조
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2008-08-02:13:35:36,0,NK-KA,C0,20.14,3580,38639,21870,3호기,1,1172,72,1164,62,245,1164,1005,1,8,2조
716,2008-08-02:13:35:02,1,NV-A32,C0,15.08,3212,48233,18340,2호기,1,1150,61,1169,61,238,1169,947,1,10,1조
717,2008-08-02:14:40:00,1,NV-A32,C0,16.60,3441,43688,19590,2호기,2,1169,65,1163,77,247,1163,948,1,10,4조
718,2008-08-02:13:35:19,1,LR-A,C0,15.59,3363,48740,80240,3호기,2,1179,86,1163,45,243,1163,940,1,10,2조


In [31]:
# HSB == 0 값이 불량을 다 포함하고 있는데 삭제 여부 질문
data['Year'] = data['ROLLING_DATE'].apply(lambda x : x.split(':')[0])

data['Hour'] = data['ROLLING_DATE'].apply(lambda x : x.split(':')[1])
data['Hour'].unique()
data['Hour'] = data['Hour'].astype("int")

def hour_spilt(hours):
    if hours == 0 :
        return "새벽"
    elif hours < 6:
        return "새벽"
    elif hours < 12:
        return "오전"
    elif hours < 18:
        return "오후"
    else:
        return "저녁"
    
data['Hour'] = data['Hour'].apply(hour_spilt)
data

Unnamed: 0,ROLLING_DATE,SCALE,SPEC,STEEL_KIND,PT_THK,PT_WDTH,PT_LTH,PT_WGT,FUR_NO,FUR_NO_ROW,...,FUR_SZ_TEMP,FUR_SZ_TIME,FUR_TIME,FUR_EXTEMP,ROLLING_TEMP_T5,HSB,ROLLING_DESCALING,WORK_GR,Year,Hour
0,2008-08-01:00:00:15,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,1,...,1133,59,282,1133,934,1,8,2조,2008-08-01,새벽
1,2008-08-01:00:00:16,1,AB/EH32-TM,T1,32.25,3707,15109,14180,1호기,2,...,1135,53,283,1135,937,1,8,2조,2008-08-01,새벽
2,2008-08-01:00:00:59,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,1,...,1121,55,282,1121,889,1,8,3조,2008-08-01,새벽
3,2008-08-01:00:01:24,1,NV-E36-TM,T8,33.27,3619,19181,18130,2호기,2,...,1127,68,316,1127,885,1,8,3조,2008-08-01,새벽
4,2008-08-01:00:01:44,1,BV-EH36-TM,T8,38.33,3098,13334,12430,3호기,1,...,1128,48,314,1128,873,1,8,1조,2008-08-01,새벽
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2008-08-02:13:35:36,0,NK-KA,C0,20.14,3580,38639,21870,3호기,1,...,1164,62,245,1164,1005,1,8,2조,2008-08-02,오후
716,2008-08-02:13:35:02,1,NV-A32,C0,15.08,3212,48233,18340,2호기,1,...,1169,61,238,1169,947,1,10,1조,2008-08-02,오후
717,2008-08-02:14:40:00,1,NV-A32,C0,16.60,3441,43688,19590,2호기,2,...,1163,77,247,1163,948,1,10,4조,2008-08-02,오후
718,2008-08-02:13:35:19,1,LR-A,C0,15.59,3363,48740,80240,3호기,2,...,1163,45,243,1163,940,1,10,2조,2008-08-02,오후


In [None]:
data['SPEC'] = data['SPEC'].apply(lambda x : x.split('-')[0])