In [1]:
# importing important library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import klib
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read data files

In [3]:
df = pd.read_csv('test.csv')

In [4]:
df_train= pd.read_csv('train.csv')
df_train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.0,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.35585,0,12.234987


In [5]:
df_test = pd.read_csv('test.csv')
df_test.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.0,0.0,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.23061,0
4,5,0,5,20,0.127644,26.320956,0


In [6]:
df_submission = pd.read_csv('sample_submission.csv')
df_submission.head()

Unnamed: 0,id,pressure
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [7]:
df_train.shape

(6036000, 8)

In [8]:
df_train.info()
df_train.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6036000 entries, 0 to 6035999
Data columns (total 8 columns):
 #   Column     Dtype  
---  ------     -----  
 0   id         int64  
 1   breath_id  int64  
 2   R          int64  
 3   C          int64  
 4   time_step  float64
 5   u_in       float64
 6   u_out      int64  
 7   pressure   float64
dtypes: float64(3), int64(5)
memory usage: 368.4 MB


id           0
breath_id    0
R            0
C            0
time_step    0
u_in         0
u_out        0
pressure     0
dtype: int64

#####  Train data sets

In [9]:
## Describe in exclude id columns
df_train[df_train.columns[1:]].describe(include='all').round(3)

Unnamed: 0,breath_id,R,C,time_step,u_in,u_out,pressure
count,6036000.0,6036000.0,6036000.0,6036000.0,6036000.0,6036000.0,6036000.0
mean,62838.859,27.036,26.081,1.307,7.322,0.62,11.22
std,36335.256,19.595,17.152,0.766,13.435,0.485,8.11
min,1.0,5.0,10.0,0.0,0.0,0.0,-1.896
25%,31377.0,5.0,10.0,0.643,0.394,0.0,6.33
50%,62765.5,20.0,20.0,1.308,4.386,1.0,7.033
75%,94301.0,50.0,50.0,1.966,4.984,1.0,13.641
max,125749.0,50.0,50.0,2.937,100.0,1.0,64.821


In [10]:
df_train['time_step'].diff()

0               NaN
1          0.033652
2          0.033862
3          0.034028
4          0.034213
             ...   
6035995    0.033412
6035996    0.033358
6035997    0.033447
6035998    0.033337
6035999    0.033273
Name: time_step, Length: 6036000, dtype: float64

In [11]:
df_train['R_C'] = [f'{r:02}_{c:02}' for r, c in zip(df_train['R'], df_train['C'])]
RCorder = list(np.sort(df_train['R_C'].unique()))

In [12]:
RCorder

['05_10',
 '05_20',
 '05_50',
 '20_10',
 '20_20',
 '20_50',
 '50_10',
 '50_20',
 '50_50']

In [13]:
df_train['time_delta'] = df_train['time_step'].diff()
df_train['time_delta'].fillna(0, inplace=True)
df_train['time_delta'].mask(df_train['time_delta'] < 0, 0, inplace=True)
df_train['tmp'] = df_train['time_delta'] * df_train['u_in']
df_train['area'] = df_train.groupby('breath_id')['tmp'].cumsum()

In [14]:
# u_in: max, min, mean, std 
u_in_max_dict = df_train.groupby('breath_id')['u_in'].max().to_dict()
df_train['u_in_max'] = df_train['breath_id'].map(u_in_max_dict)
u_in_min_dict = df_train.groupby('breath_id')['u_in'].min().to_dict()
df_train['u_in_min'] = df_train['breath_id'].map(u_in_min_dict)
u_in_mean_dict = df_train.groupby('breath_id')['u_in'].mean().to_dict()
df_train['u_in_mean'] = df_train['breath_id'].map(u_in_mean_dict)
u_in_std_dict = df_train.groupby('breath_id')['u_in'].std().to_dict()
df_train['u_in_std'] = df_train['breath_id'].map(u_in_std_dict)

In [15]:
# u_in_half is time:0 - time point of u_out:1 rise (almost 1.0s)
df_train['tmp'] = df_train['u_out']*(-1)+1 # inversion of u_out
df_train['u_in_half'] = df_train['tmp'] * df_train['u_in']

In [16]:
# u_in_half: max, min, mean, std
u_in_half_max_dict = df_train.groupby('breath_id')['u_in_half'].max().to_dict()
df_train['u_in_half_max'] = df_train['breath_id'].map(u_in_half_max_dict)
u_in_half_min_dict = df_train.groupby('breath_id')['u_in_half'].min().to_dict()
df_train['u_in_half_min'] = df_train['breath_id'].map(u_in_half_min_dict)
u_in_half_mean_dict = df_train.groupby('breath_id')['u_in_half'].mean().to_dict()
df_train['u_in_half_mean'] = df_train['breath_id'].map(u_in_half_mean_dict)
u_in_half_std_dict = df_train.groupby('breath_id')['u_in_half'].std().to_dict()
df_train['u_in_half_std'] = df_train['breath_id'].map(u_in_half_std_dict)

In [17]:
# All entries are first point of each breath_id
first_df = df_train.loc[0::80,:]
# All entries are first point of each breath_id
last_df = df_train.loc[79::80,:]

In [18]:
df_train['u_out_diff'] = df_train['u_out'].diff()
df_train['u_out_diff'].fillna(0, inplace=True)
df_train['u_out_diff'].replace(-1, 0, inplace=True)
uout1_df = df_train[df_train['u_out_diff']==1]

In [19]:
# Register Area when u_out becomes 1
uout1_area_dict = dict(zip(first_df['breath_id'], first_df['u_in']))
df_train['area_uout1'] = df_train['breath_id'].map(uout1_area_dict) 

In [20]:
# u_in: first point, last point
u_in_first_dict = dict(zip(first_df['breath_id'], first_df['u_in']))
df_train['u_in_first'] = df_train['breath_id'].map(u_in_first_dict)
u_in_last_dict = dict(zip(first_df['breath_id'], last_df['u_in']))
df_train['u_in_last'] = df_train['breath_id'].map(u_in_last_dict)
# time(sec) of end point
time_end_dict = dict(zip(last_df['breath_id'], last_df['time_step']))     
df_train['time_end'] = df_train['breath_id'].map(time_end_dict)

In [21]:
# time(sec) when u_out becomes 1
uout1_dict = dict(zip(uout1_df['breath_id'], uout1_df['time_step']))
df_train['time_uout1'] = df_train['breath_id'].map(uout1_dict)

In [22]:
# u_in when u_out becomes1
u_in_uout1_dict = dict(zip(uout1_df['breath_id'], uout1_df['u_in']))
df_train['u_in_uout1'] = df_train['breath_id'].map(u_in_uout1_dict)

In [23]:
# Dict that puts 0 at the beginning of the 80row cycle
first_0_dict = dict(zip(first_df['id'], [0]*len(uout1_df)))

# Faster version u_in_diff creation, faster than groupby
df_train['u_in_diff'] = df_train['u_in'].diff()
df_train['tmp'] = df_train['id'].map(first_0_dict) # put 0, the 80row cycle
df_train.iloc[0::80, df_train.columns.get_loc('u_in_diff')] = df_train.iloc[0::80, df_train.columns.get_loc('tmp')]

In [24]:
# Create u_in vibration
df_train['diff_sign'] = np.sign(df_train['u_in_diff'])
df_train['sign_diff'] = df_train['diff_sign'].diff()
df_train['tmp'] = df_train['id'].map(first_0_dict) # put 0, the 80row cycle
df_train.iloc[0::80, df_train.columns.get_loc('sign_diff')] = df_train.iloc[0::80, df_train.columns.get_loc('tmp')]

# Count the number of inversions, so take the absolute value and sum
df_train['sign_diff'] = abs(df_train['sign_diff']) 
sign_diff_dict = df_train.groupby('breath_id')['sign_diff'].sum().to_dict()
df_train['diff_vib'] = df_train['breath_id'].map(sign_diff_dict)

In [25]:
if 'diff_sign' in df_train.columns:
    df_train.drop(['diff_sign', 'sign_diff'], axis=1, inplace=True)

#####  Test data sets

In [26]:
df_test['time_step'].diff()

0               NaN
1          0.031904
2          0.031924
3          0.031924
4          0.031893
             ...   
4023995    0.033753
4023996    0.033736
4023997    0.033622
4023998    0.033659
4023999    0.034167
Name: time_step, Length: 4024000, dtype: float64

In [27]:
df_test['R_C'] = [f'{r:02}_{c:02}' for r, c in zip(df_test['R'], df_test['C'])]
Rcorder = list(np.sort(df_test['R_C'].unique()))

In [28]:
Rcorder

['05_10',
 '05_20',
 '05_50',
 '20_10',
 '20_20',
 '20_50',
 '50_10',
 '50_20',
 '50_50']

In [29]:
df_test['time_delta'] = df_test['time_step'].diff()
df_test['time_delta'].fillna(0, inplace=True)
df_test['time_delta'].mask(df_test['time_delta'] < 0, 0, inplace=True)
df_test['tmp'] = df_test['time_delta'] * df_test['u_in']
df_test['area'] = df_test.groupby('breath_id')['tmp'].cumsum()

In [30]:
# u_in: max, min, mean, std 
u_in_max_dict = df_test.groupby('breath_id')['u_in'].max().to_dict()
df_test['u_in_max'] = df_test['breath_id'].map(u_in_max_dict)
u_in_min_dict = df_test.groupby('breath_id')['u_in'].min().to_dict()
df_test['u_in_min'] = df_test['breath_id'].map(u_in_min_dict)
u_in_mean_dict = df_test.groupby('breath_id')['u_in'].mean().to_dict()
df_test['u_in_mean'] = df_test['breath_id'].map(u_in_mean_dict)
u_in_std_dict = df_test.groupby('breath_id')['u_in'].std().to_dict()
df_test['u_in_std'] = df_test['breath_id'].map(u_in_std_dict)

In [31]:
# u_in_half is time:0 - time point of u_out:1 rise (almost 1.0s)
df_test['tmp'] = df_test['u_out']*(-1)+1 # inversion of u_out
df_test['u_in_half'] = df_test['tmp'] * df_test['u_in']

In [32]:
# u_in_half: max, min, mean, std
u_in_half_max_dict = df_test.groupby('breath_id')['u_in_half'].max().to_dict()
df_test['u_in_half_max'] = df_test['breath_id'].map(u_in_half_max_dict)
u_in_half_min_dict = df_test.groupby('breath_id')['u_in_half'].min().to_dict()
df_test['u_in_half_min'] = df_test['breath_id'].map(u_in_half_min_dict)
u_in_half_mean_dict = df_test.groupby('breath_id')['u_in_half'].mean().to_dict()
df_test['u_in_half_mean'] = df_test['breath_id'].map(u_in_half_mean_dict)
u_in_half_std_dict = df_test.groupby('breath_id')['u_in_half'].std().to_dict()
df_test['u_in_half_std'] = df_test['breath_id'].map(u_in_half_std_dict)

In [33]:
# All entries are first point of each breath_id
first_df = df_test.loc[0::80,:]
# All entries are first point of each breath_id
last_df = df_test.loc[79::80,:]

In [34]:
df_test['u_out_diff'] = df_test['u_out'].diff()
df_test['u_out_diff'].fillna(0, inplace=True)
df_test['u_out_diff'].replace(-1, 0, inplace=True)
uout1_df = df_test[df_test['u_out_diff']==1]

In [35]:
# Register Area when u_out becomes 1
uout1_area_dict = dict(zip(first_df['breath_id'], first_df['u_in']))
df_test['area_uout1'] = df_test['breath_id'].map(uout1_area_dict)

In [36]:
# u_in: first point, last point
u_in_first_dict = dict(zip(first_df['breath_id'], first_df['u_in']))
df_test['u_in_first'] = df_test['breath_id'].map(u_in_first_dict)
u_in_last_dict = dict(zip(first_df['breath_id'], last_df['u_in']))
df_test['u_in_last'] = df_test['breath_id'].map(u_in_last_dict)
# time(sec) of end point
time_end_dict = dict(zip(last_df['breath_id'], last_df['time_step']))     
df_test['time_end'] = df_test['breath_id'].map(time_end_dict)

In [37]:
# time(sec) when u_out becomes 1
uout1_dict = dict(zip(uout1_df['breath_id'], uout1_df['time_step']))
df_test['time_uout1'] = df_test['breath_id'].map(uout1_dict)

In [38]:
# u_in when u_out becomes1
u_in_uout1_dict = dict(zip(uout1_df['breath_id'], uout1_df['u_in']))
df_test['u_in_uout1'] = df_test['breath_id'].map(u_in_uout1_dict)

In [39]:
# Dict that puts 0 at the beginning of the 80row cycle
first_0_dict = dict(zip(first_df['id'], [0]*len(uout1_df)))

# Faster version u_in_diff creation, faster than groupby
df_test['u_in_diff'] = df_test['u_in'].diff()
df_test['tmp'] = df_test['id'].map(first_0_dict) # put 0, the 80row cycle
df_test.iloc[0::80, df_test.columns.get_loc('u_in_diff')] = df_test.iloc[0::80, df_test.columns.get_loc('tmp')]

In [40]:
# Create u_in vibration
df_test['diff_sign'] = np.sign(df_test['u_in_diff'])
df_test['sign_diff'] = df_test['diff_sign'].diff()
df_test['tmp'] = df_test['id'].map(first_0_dict) # put 0, the 80row cycle
df_test.iloc[0::80, df_test.columns.get_loc('sign_diff')] = df_test.iloc[0::80, df_test.columns.get_loc('tmp')]

# Count the number of inversions, so take the absolute value and sum
df_test['sign_diff'] = abs(df_test['sign_diff']) 
sign_diff_dict = df_test.groupby('breath_id')['sign_diff'].sum().to_dict()
df_test['diff_vib'] = df_test['breath_id'].map(sign_diff_dict)

In [41]:
if 'diff_sign' in df_test.columns:
    df_test.drop(['diff_sign', 'sign_diff'], axis=1, inplace=True)

In [42]:
print(len(df_train.columns))
print(len(df_test.columns))

30
29


In [43]:
#Training and test data
X_train = df_train.drop(['id','breath_id','pressure','R_C','tmp'], axis = 1)
Y_train = df_train['pressure']
X_test = df_test.drop(['id','breath_id','R_C','tmp'], axis = 1).copy()
print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("X_test shape: ", X_test.shape)

X_train shape:  (6036000, 25)
Y_train shape:  (6036000,)
X_test shape:  (4024000, 25)


In [44]:
len(X_train.columns)

25

In [45]:
len(X_test.columns)

25

In [46]:
X_test.isnull().sum()

R                 0
C                 0
time_step         0
u_in              0
u_out             0
time_delta        0
area              0
u_in_max          0
u_in_min          0
u_in_mean         0
u_in_std          0
u_in_half         0
u_in_half_max     0
u_in_half_min     0
u_in_half_mean    0
u_in_half_std     0
u_out_diff        0
area_uout1        0
u_in_first        0
u_in_last         0
time_end          0
time_uout1        0
u_in_uout1        0
u_in_diff         0
diff_vib          0
dtype: int64

In [47]:
from sklearn.preprocessing import RobustScaler

# Create the object
sc=RobustScaler()
sc.fit(X_train,X_test ) 
X_train_scaled =sc.transform(X_train)
X_test_scaled  =sc.transform(X_test)

# Linear Regression

In [48]:
from sklearn.linear_model import LinearRegression
logreg = LinearRegression()
logreg.fit(X_train_scaled,Y_train)
Y_pred = logreg.predict(X_test_scaled)
acclog = round(logreg.score(X_train_scaled,Y_train)*100,2)
acclog

59.37

# Decision Tree Regressor

In [49]:
from sklearn.tree import DecisionTreeRegressor
decision_tree = DecisionTreeRegressor()
decision_tree.fit(X_train_scaled, Y_train)
Y_pred = decision_tree.predict(X_test_scaled)
acc_decision_tree = round(decision_tree.score(X_train_scaled, Y_train) * 100, 2)
acc_decision_tree

100.0

# Catboost Regressor

In [50]:
from catboost import CatBoostRegressor
catboost = CatBoostRegressor()
catboost.fit(X_train_scaled, Y_train)
Y_pred = catboost.predict(X_test_scaled)
acc_catboost = round(catboost.score(X_train_scaled, Y_train) * 100, 2)

Learning rate set to 0.183812
0:	learn: 6.9371400	total: 1.52s	remaining: 25m 15s
1:	learn: 6.0170305	total: 2.88s	remaining: 23m 56s
2:	learn: 5.3002851	total: 4.3s	remaining: 23m 50s
3:	learn: 4.7162979	total: 5.75s	remaining: 23m 50s
4:	learn: 4.2577423	total: 7.49s	remaining: 24m 50s
5:	learn: 3.9052548	total: 8.95s	remaining: 24m 42s
6:	learn: 3.6230092	total: 10.4s	remaining: 24m 34s
7:	learn: 3.4144605	total: 11.7s	remaining: 24m 15s
8:	learn: 3.2420088	total: 13.4s	remaining: 24m 39s
9:	learn: 3.0927610	total: 15.1s	remaining: 24m 59s
10:	learn: 2.9555576	total: 17.9s	remaining: 26m 52s
11:	learn: 2.8520728	total: 20.7s	remaining: 28m 20s
12:	learn: 2.7718692	total: 23.3s	remaining: 29m 28s
13:	learn: 2.6882738	total: 26.1s	remaining: 30m 34s
14:	learn: 2.6256707	total: 28.8s	remaining: 31m 28s
15:	learn: 2.5640909	total: 31.7s	remaining: 32m 30s
16:	learn: 2.5085874	total: 34.4s	remaining: 33m 11s
17:	learn: 2.4634217	total: 37.3s	remaining: 33m 54s
18:	learn: 2.4331057	total:

153:	learn: 1.4018848	total: 5m 5s	remaining: 27m 56s
154:	learn: 1.3996557	total: 5m 7s	remaining: 27m 55s
155:	learn: 1.3977605	total: 5m 9s	remaining: 27m 53s
156:	learn: 1.3948073	total: 5m 11s	remaining: 27m 52s
157:	learn: 1.3918468	total: 5m 13s	remaining: 27m 53s
158:	learn: 1.3902911	total: 5m 15s	remaining: 27m 51s
159:	learn: 1.3885742	total: 5m 18s	remaining: 27m 50s
160:	learn: 1.3866298	total: 5m 20s	remaining: 27m 51s
161:	learn: 1.3832706	total: 5m 23s	remaining: 27m 53s
162:	learn: 1.3812608	total: 5m 25s	remaining: 27m 51s
163:	learn: 1.3797121	total: 5m 26s	remaining: 27m 45s
164:	learn: 1.3771939	total: 5m 28s	remaining: 27m 39s
165:	learn: 1.3759049	total: 5m 29s	remaining: 27m 33s
166:	learn: 1.3741309	total: 5m 30s	remaining: 27m 28s
167:	learn: 1.3728222	total: 5m 31s	remaining: 27m 23s
168:	learn: 1.3710199	total: 5m 32s	remaining: 27m 17s
169:	learn: 1.3679399	total: 5m 34s	remaining: 27m 10s
170:	learn: 1.3654774	total: 5m 36s	remaining: 27m 9s
171:	learn: 1.

304:	learn: 1.2003954	total: 8m 41s	remaining: 19m 48s
305:	learn: 1.1995803	total: 8m 42s	remaining: 19m 45s
306:	learn: 1.1988442	total: 8m 43s	remaining: 19m 42s
307:	learn: 1.1978460	total: 8m 45s	remaining: 19m 39s
308:	learn: 1.1974824	total: 8m 46s	remaining: 19m 36s
309:	learn: 1.1963263	total: 8m 47s	remaining: 19m 33s
310:	learn: 1.1954664	total: 8m 48s	remaining: 19m 31s
311:	learn: 1.1943988	total: 8m 49s	remaining: 19m 28s
312:	learn: 1.1923753	total: 8m 51s	remaining: 19m 26s
313:	learn: 1.1916586	total: 8m 52s	remaining: 19m 23s
314:	learn: 1.1903977	total: 8m 53s	remaining: 19m 21s
315:	learn: 1.1897497	total: 8m 55s	remaining: 19m 18s
316:	learn: 1.1888758	total: 8m 56s	remaining: 19m 15s
317:	learn: 1.1884025	total: 8m 57s	remaining: 19m 12s
318:	learn: 1.1869829	total: 8m 58s	remaining: 19m 10s
319:	learn: 1.1863129	total: 9m	remaining: 19m 7s
320:	learn: 1.1858097	total: 9m 1s	remaining: 19m 4s
321:	learn: 1.1849804	total: 9m 2s	remaining: 19m 2s
322:	learn: 1.18414

453:	learn: 1.1056520	total: 12m 11s	remaining: 14m 39s
454:	learn: 1.1049747	total: 12m 12s	remaining: 14m 37s
455:	learn: 1.1045355	total: 12m 13s	remaining: 14m 34s
456:	learn: 1.1043648	total: 12m 14s	remaining: 14m 32s
457:	learn: 1.1038376	total: 12m 15s	remaining: 14m 30s
458:	learn: 1.1028597	total: 12m 16s	remaining: 14m 28s
459:	learn: 1.1019415	total: 12m 18s	remaining: 14m 26s
460:	learn: 1.1016269	total: 12m 19s	remaining: 14m 24s
461:	learn: 1.1010851	total: 12m 20s	remaining: 14m 22s
462:	learn: 1.1007873	total: 12m 22s	remaining: 14m 20s
463:	learn: 1.1004174	total: 12m 23s	remaining: 14m 18s
464:	learn: 1.0998373	total: 12m 24s	remaining: 14m 16s
465:	learn: 1.0996382	total: 12m 25s	remaining: 14m 14s
466:	learn: 1.0994042	total: 12m 26s	remaining: 14m 12s
467:	learn: 1.0987185	total: 12m 27s	remaining: 14m 10s
468:	learn: 1.0978867	total: 12m 28s	remaining: 14m 7s
469:	learn: 1.0975752	total: 12m 29s	remaining: 14m 5s
470:	learn: 1.0969495	total: 12m 31s	remaining: 14

601:	learn: 1.0450918	total: 15m 30s	remaining: 10m 15s
602:	learn: 1.0447931	total: 15m 32s	remaining: 10m 13s
603:	learn: 1.0446413	total: 15m 33s	remaining: 10m 12s
604:	learn: 1.0442215	total: 15m 35s	remaining: 10m 10s
605:	learn: 1.0438048	total: 15m 37s	remaining: 10m 9s
606:	learn: 1.0433598	total: 15m 39s	remaining: 10m 8s
607:	learn: 1.0427534	total: 15m 41s	remaining: 10m 6s
608:	learn: 1.0424961	total: 15m 42s	remaining: 10m 5s
609:	learn: 1.0421741	total: 15m 44s	remaining: 10m 3s
610:	learn: 1.0418053	total: 15m 46s	remaining: 10m 2s
611:	learn: 1.0414478	total: 15m 47s	remaining: 10m
612:	learn: 1.0411215	total: 15m 49s	remaining: 9m 59s
613:	learn: 1.0405320	total: 15m 51s	remaining: 9m 57s
614:	learn: 1.0400214	total: 15m 52s	remaining: 9m 56s
615:	learn: 1.0397841	total: 15m 54s	remaining: 9m 54s
616:	learn: 1.0392459	total: 15m 56s	remaining: 9m 53s
617:	learn: 1.0388865	total: 15m 58s	remaining: 9m 52s
618:	learn: 1.0385256	total: 16m	remaining: 9m 50s
619:	learn: 1

752:	learn: 0.9979476	total: 19m 11s	remaining: 6m 17s
753:	learn: 0.9977400	total: 19m 12s	remaining: 6m 16s
754:	learn: 0.9974319	total: 19m 14s	remaining: 6m 14s
755:	learn: 0.9971227	total: 19m 15s	remaining: 6m 12s
756:	learn: 0.9967307	total: 19m 16s	remaining: 6m 11s
757:	learn: 0.9965690	total: 19m 17s	remaining: 6m 9s
758:	learn: 0.9964901	total: 19m 18s	remaining: 6m 7s
759:	learn: 0.9963586	total: 19m 20s	remaining: 6m 6s
760:	learn: 0.9956220	total: 19m 21s	remaining: 6m 4s
761:	learn: 0.9954566	total: 19m 22s	remaining: 6m 3s
762:	learn: 0.9951119	total: 19m 23s	remaining: 6m 1s
763:	learn: 0.9950086	total: 19m 25s	remaining: 5m 59s
764:	learn: 0.9948270	total: 19m 26s	remaining: 5m 58s
765:	learn: 0.9945011	total: 19m 27s	remaining: 5m 56s
766:	learn: 0.9943329	total: 19m 29s	remaining: 5m 55s
767:	learn: 0.9942259	total: 19m 30s	remaining: 5m 53s
768:	learn: 0.9939594	total: 19m 31s	remaining: 5m 51s
769:	learn: 0.9937927	total: 19m 32s	remaining: 5m 50s
770:	learn: 0.99

903:	learn: 0.9623994	total: 22m 33s	remaining: 2m 23s
904:	learn: 0.9622773	total: 22m 35s	remaining: 2m 22s
905:	learn: 0.9620398	total: 22m 36s	remaining: 2m 20s
906:	learn: 0.9618770	total: 22m 37s	remaining: 2m 19s
907:	learn: 0.9617612	total: 22m 39s	remaining: 2m 17s
908:	learn: 0.9614424	total: 22m 40s	remaining: 2m 16s
909:	learn: 0.9608689	total: 22m 42s	remaining: 2m 14s
910:	learn: 0.9605880	total: 22m 43s	remaining: 2m 13s
911:	learn: 0.9604559	total: 22m 44s	remaining: 2m 11s
912:	learn: 0.9601701	total: 22m 46s	remaining: 2m 10s
913:	learn: 0.9600304	total: 22m 47s	remaining: 2m 8s
914:	learn: 0.9598878	total: 22m 48s	remaining: 2m 7s
915:	learn: 0.9596369	total: 22m 50s	remaining: 2m 5s
916:	learn: 0.9595010	total: 22m 51s	remaining: 2m 4s
917:	learn: 0.9593485	total: 22m 53s	remaining: 2m 2s
918:	learn: 0.9592260	total: 22m 54s	remaining: 2m 1s
919:	learn: 0.9590489	total: 22m 55s	remaining: 1m 59s
920:	learn: 0.9588752	total: 22m 57s	remaining: 1m 58s
921:	learn: 0.95

In [51]:
acc_catboost

98.65

# LightGbm

In [52]:
from lightgbm import LGBMRegressor
lgb = LGBMRegressor()
lgb.fit(X_train, Y_train)
Y_pred = lgb.predict(X_test_scaled)
acc_lgb = round(lgb.score(X_train, Y_train) * 100, 2)
acc_lgb

97.19

# XG Boost

In [53]:
from xgboost import XGBRegressor
xgb = XGBRegressor()
xgb.fit(X_train_scaled,Y_train)
Y_pred = xgb.predict(X_test_scaled)
acc_xgb = round(xgb.score(X_train_scaled,Y_train)*100,2)
acc_xgb

97.84

# Neural Networks

In [54]:
import tensorflow as tf 
from tensorflow import keras

In [66]:
#syntax =  keras.layers.Dense(output_dimension, input_shape, activation)

model = keras.Sequential([
  
    keras.layers.Dense(128, input_shape=(25,), activation='relu'),#Dense means all the neurons of present are connected to all the neurons in next layer
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(15,activation='relu'),
    keras.layers.Dense(1)
])

# opt = keras.optimizers.Adam(learning_rate=0.01)

model.compile(optimizer='adam',
              loss='mean_squared_error', 
              metrics=['mae','mse'])

model.fit(X_train_scaled, Y_train, epochs=10)#start slow with epochs and then increase the value

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x240f1659a30>

In [67]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 128)               3328      
_________________________________________________________________
dense_5 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_6 (Dense)              (None, 15)                1935      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 16        
Total params: 21,791
Trainable params: 21,791
Non-trainable params: 0
_________________________________________________________________


In [68]:
model.evaluate(X_test_scaled)



[0.0, 0.0, 0.0]

In [69]:
model.evaluate_generator(X_test_scaled)

[0.0, 0.0, 0.0]

In [70]:
pred = model.predict(X_test_scaled)

In [71]:
pred

array([[6.4515696],
       [5.5990343],
       [6.8677263],
       ...,
       [6.4515696],
       [6.4515696],
       [6.4515696]], dtype=float32)

In [72]:
pred = np.argmax(pred,axis = 1)

In [73]:
pred = pd.Series(pred,name="pressure")

In [74]:
submission = pd.concat([pd.Series(range(1,4024000),name = "id"),pred],axis = 1)

In [77]:
submission['id']=submission['id'].astype('Int32')
submission.head()

Unnamed: 0,id,pressure
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [76]:
submission.to_csv("Submission.csv",index=False)