In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import glob
import pandas as pd
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from keras.optimizers import Adam

training_path = 'D:\\GDG DS\\dataset\\train\\*.csv'
train_files = glob.glob(training_path)
t_data = pd.concat([pd.read_csv(file) for file in train_files], ignore_index=True)

valid_path = 'D:\\GDG DS\\dataset\\valid\\*.csv'
valid_files = glob.glob(valid_path)
v_data = pd.concat([pd.read_csv(vfile) for vfile in valid_files], ignore_index=True)

In [2]:
t_data.columns = t_data.columns.str.strip()
v_data.columns = v_data.columns.str.strip()

train_data = t_data.drop(['ID', 'nx'], axis=1)
valid_data = v_data.drop(['ID', 'nx'], axis=1)

X_train = train_data.iloc[:, :51].values  
y_train = train_data.iloc[:, 51:].values   
X_valid = valid_data.iloc[:, :51].values    
y_valid = valid_data.iloc[:, 51:].values     

X_train_sensors = X_train[:, 1:] 
X_valid_sensors = X_valid[:, 1:] 

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_sensors)
X_valid_scaled = scaler.transform(X_valid_sensors)

X_train_reshaped = X_train_scaled.reshape(-1, 50, 1)
X_valid_reshaped = X_valid_scaled.reshape(-1, 50, 1)

In [12]:
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(50, 1), return_sequences=False))
# model.add(Dropout(0.2))
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(2500))

optimizer = Adam(learning_rate=0.0008, clipnorm=1.0)
model.compile(optimizer=optimizer, loss='mse')

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
model.fit(X_train_reshaped, y_train, epochs=200, batch_size=20, validation_data=(X_valid_reshaped, y_valid), callbacks=[early_stopping])

predictions = model.predict(X_valid_reshaped)

mse = mean_squared_error(y_valid, predictions)
mae = mean_absolute_error(y_valid, predictions)
r2 = r2_score(y_valid, predictions)

print("Validation MSE:", mse)
print("Validation MAE:", mae)
print("Validation R² Score:", r2)

  super().__init__(**kwargs)


Epoch 1/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - loss: 11362326.0000 - val_loss: 66222.8984
Epoch 2/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 133680.0781 - val_loss: 251.4088
Epoch 3/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 358.7164 - val_loss: 228832.1562
Epoch 4/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 30826.2793 - val_loss: 203.3438
Epoch 5/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 199.2595 - val_loss: 169.4037
Epoch 6/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 155.3317 - val_loss: 119.0750
Epoch 7/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 120.6319 - val_loss: 112.9503
Epoch 8/200
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - loss: 111.326

In [13]:
test_data = pd.read_csv("D:\\GDG DS\\dataset\\test\\dataset_0.csv")
test_data.columns = test_data.columns.str.strip()

X_test_features = test_data.iloc[:, 3:53].values
X_test_scaled = scaler.transform(X_test_features)

X_test_reshaped = X_test_scaled.reshape(-1, 50, 1)
test_predictions = model.predict(X_test_reshaped)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [14]:
predictions_df = pd.DataFrame(test_predictions, columns=[f'val_{i}' for i in range(50, 2550)])


columns_to_keep = ['ID', 'L', 'nx'] + [f'val_{i}' for i in range(50)]
final_test_data = pd.concat([test_data[columns_to_keep], predictions_df], axis=1)

In [15]:
final_test_data.to_csv("D:\\GDG DS\\dataset\\submission\\submission7.csv", index=False)

print(final_test_data.head())
print(final_test_data.shape)

   ID     L  nx  val_0  val_1   val_2   val_3  val_4   val_5  val_6  ...  \
0   0  0.42  50  29.74  52.66   73.03   88.64  97.89  100.00  95.08  ...   
1   1  1.11  50  31.43  85.09  100.00   70.25  25.09    0.00   5.55  ...   
2   2  0.67  50  32.52  66.48   91.06  100.00  92.17   71.73  46.49  ...   
3   3  0.87  50  27.84  72.27   99.23  100.00  78.38   47.69  22.80  ...   
4   4  0.84  50  34.57  71.48   95.72  100.00  85.19   59.27  33.18  ...   

    val_2540   val_2541   val_2542   val_2543   val_2544   val_2545  \
0  29.513666  29.800240  29.119366  30.215857  31.552290  34.119270   
1  32.207390  33.556477  36.431229  37.096989  34.247814  30.950804   
2  29.722403  29.608234  28.639227  27.668055  24.560699  21.292730   
3  27.637581  26.657835  24.499636  23.413424  22.740475  24.812342   
4  30.932081  27.876802  24.161255  24.119654  24.729183  29.783863   

    val_2546   val_2547   val_2548   val_2549  
0  37.077007  42.583729  47.242226  52.060482  
1  28.744024  29.924