In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, MaxPooling1D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# 1. Load the dataset
file_path = 'Merged_HKKT_with_all_features.csv'
data = pd.read_csv(file_path)

# 2. Separate the features and the target variable (Pseudorange Residual (m))
X = data.drop(columns=['Pseudorange Residual (m)'])  # Features
y = data['Pseudorange Residual (m)']  # Target

# 3. Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
# 4. Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Reshape the data for CNN input (CNN expects 3D input: [samples, timesteps, features])
X_train_cnn = np.expand_dims(X_train_scaled, axis=2)
X_test_cnn = np.expand_dims(X_test_scaled, axis=2)

ValueError: could not convert string to float: '2021-01-02 11:43:00'

In [None]:
# 6. Build the CNN model
def create_cnn(input_shape):
    model = Sequential()
    
    # First Convolutional Layer
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(BatchNormalization())
    
    # Second Convolutional Layer
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(BatchNormalization())
    
    # Third Convolutional Layer
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(BatchNormalization())
    
    # Flattening and Fully Connected Layers
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))  # Dropout to prevent overfitting
    model.add(Dense(1))  # Output layer for regression
    
    # Compile the model
    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    return model

# 7. Create the model
input_shape = (X_train_cnn.shape[1], X_train_cnn.shape[2])
cnn_model = create_cnn(input_shape)

# 8. Train the CNN model
cnn_model.fit(X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

# 9. Predict the Pseudorange Residual on the test set
y_pred_cnn = cnn_model.predict(X_test_cnn)

# 10. Calculate RMS for CNN and improvement rate
rms_original = np.sqrt(mean_squared_error(y_test, y_test))  # This is essentially 0, but keeping for consistency
rms_cnn = np.sqrt(mean_squared_error(y_test, y_pred_cnn))
improvement_rate = (rms_original - rms_cnn) / rms_original * 100

print(f'RMS of CNN model: {rms_cnn:.4f} meters')
print(f'Improvement Rate: {improvement_rate:.2f}%')