In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
import datetime
from tensorflow.keras import backend as K
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from keras.optimizers import SGD



In [2]:
def list_subdirectories(directory_path):
    subdirectories =  [subdir for subdir in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, subdir))]
    return subdirectories

train_doc_list = list_subdirectories('train')

test_doc_list = list_subdirectories('test')


total_gnss_df = pd.DataFrame({})
total_imu_df = pd.DataFrame({})
total_gt_df = pd.DataFrame({})

def utc_to_unix_millis(utc_millis_array):
    # Unix epoch start time in milliseconds
    unix_epoch_start = np.datetime64('1970-01-01T00:00:00', 'ms').astype('datetime64[ms]').astype(int)
    # Convert UTC time array to Unix time array
    unix_millis_array = np.array(utc_millis_array) - unix_epoch_start
    return unix_millis_array

def unix_to_utc_millis(unix_millis_array):
    # Unix epoch start time in milliseconds
    unix_epoch_start = np.datetime64('1970-01-01T00:00:00', 'ms').astype('datetime64[ms]').astype(int)
    # Convert Unix time array to UTC time array
    utc_millis_array = np.array(unix_millis_array) + unix_epoch_start
    return utc_millis_array

scaler = StandardScaler()


Models:

In [10]:
# Model architecture
model_rnn = Sequential()
model_rnn.add(SimpleRNN(units=128, return_sequences=True, input_shape=(20,1 )))  
model_rnn.add(SimpleRNN(units=64, return_sequences=True, activation='tanh'))
model_rnn.add(SimpleRNN(units=32,activation='tanh'))  
model_rnn.add(Dense(2))

# Compile the model
optimizer = SGD(learning_rate=0.001)
model_rnn.compile(loss='mse', optimizer=optimizer)

In [4]:
# Define the combined CNN-GRU model
def create_cnn_gru_model(input_shape, l2_reg=0.01):
    model = tf.keras.Sequential([
        # CNN layers
        tf.keras.layers.Reshape((-1, 1), input_shape=input_shape),  # Reshape for Conv1D input
        tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        
        # GRU layer
        tf.keras.layers.Reshape((-1, 64)),  # Reshape for GRU input
        tf.keras.layers.GRU(48),
        
        # Dense output layer
        tf.keras.layers.Dense(2)  # Output layer for predicting 2 new features
    ])
    return model

# Define the model
input_shape = (None, 21)  # Adjust according to your data dimensions (21 features)
model_cnn_gru = create_cnn_gru_model(input_shape)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Set the learning rate to 0.001

# Compile the model with the customized optimizer
model_cnn_gru.compile(optimizer=optimizer, loss='mae')


# Function to train the model on a single file with gradient clipping
def train_model_on_file(model, x_train, y_train, clip_norm=1.0):
    model.fit(x_train, y_train, epochs=10, batch_size=16)  # Train the model


In [5]:
# Create a linear regression model
model = LinearRegression()

## TRAIN

Reading the trainin data and preprocessing:

!!!!!!!! DO NOT RUN THIS SECTIONS. IT IS ONLY RECREATING THE TRAINING FILES IN THE FORMAT THAT I WILL USE LATER TO TRAIN THE MODELS. THE RECREATED FILES ALREADY INCLUDED AT THE GIVEN .ZIP FILE, SINCE THE TRAIN DATA GIVEN IS CONSUMES TOO MUCH STORAGE I DID NOT INCLUDE THEM FOR YOU TO DOWNLOAD THEM AGAIN. !!!!!!!!!!!!!!

In [5]:
counter = 0
for main_doc_name in train_doc_list:
    sub_sub_list = list_subdirectories('train'+'/'+main_doc_name)
    
    for doc_name_list in sub_sub_list:
        
        #GNSS Data Part:

        #Reading the present gnss csv file
        df_gnss = pd.read_csv('train'+'/'+main_doc_name+'/'+doc_name_list+'/'+'device_gnss.csv', low_memory=False)

        data_gnss = pd.DataFrame()
        #Taking only the necessary columns
        data_gnss['utcTimeMillis'] =  df_gnss["utcTimeMillis"]
        data_gnss['SvVelocityYEcefMetersPerSecond'] = df_gnss["SvVelocityYEcefMetersPerSecond"]
        data_gnss['SvVelocityZEcefMetersPerSecond'] = df_gnss['SvVelocityZEcefMetersPerSecond']
        data_gnss['SvVelocityXEcefMetersPerSecond'] = df_gnss['SvVelocityXEcefMetersPerSecond']
        data_gnss['RawPseudorangeMeters'] = df_gnss['RawPseudorangeMeters']
        data_gnss['SvClockBiasMeters'] = df_gnss['SvClockBiasMeters']
        data_gnss['IsrbMeters'] = df_gnss['IsrbMeters']
        data_gnss['TroposphericDelayMeters'] = df_gnss['TroposphericDelayMeters']
        data_gnss['IonosphericDelayMeters'] = df_gnss['IonosphericDelayMeters']
        data_gnss['WlsPositionXEcefMeters'] = df_gnss['WlsPositionXEcefMeters']
        data_gnss['WlsPositionYEcefMeters'] = df_gnss['WlsPositionYEcefMeters']
        data_gnss['WlsPositionZEcefMeters'] = df_gnss['WlsPositionZEcefMeters']
               

        #If more than one measurement is taken in the same second, average the measurements taken for the same time period and create one line of data for each second
        ten_digit_gnss = data_gnss.copy()
        ten_digit_gnss['utcTimeMillis'] = data_gnss['utcTimeMillis'].astype(str).str[:10].astype(int)
        gnss_data = ten_digit_gnss.groupby('utcTimeMillis').mean().reset_index()
        gnss_data = gnss_data.fillna(method='ffill')


        #IMU Data Part:
        #Reading the present imu csv file
        df_imu = pd.read_csv('train'+'/'+main_doc_name+'/'+doc_name_list+'/'+'device_imu.csv', low_memory=False)

        # Transforming it to Pandas DataFrame to be able to make moves easily
        data_imu = pd.DataFrame()

        #Taking only the necessary columns
        data_imu['MessageType'] =  df_imu["MessageType"]
        data_imu['utcTimeMillis'] =  df_imu["utcTimeMillis"]
        data_imu['MeasurementX'] =  df_imu["MeasurementX"]
        data_imu['MeasurementY'] = df_imu["MeasurementY"]
        data_imu['MeasurementZ'] = df_imu['MeasurementZ']
        
        #Since the accelerometer, gyro and magnetometer are combined in the same sensor in the IMU, it can be seen that the data received from these sensors 
        #are stored in a mixed manner. To fix this and evaluate each sensor data on its own, three sensor data are transferred to three different dataframes
        accel_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalAccel']
        mag_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalMag']
        gyro_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalGyro']
        
        #For the next step empty DataFrames are created
        ten_digit_accel = accel_imu_data.copy()
        ten_digit_mag = mag_imu_data.copy()
        ten_digit_gyro = gyro_imu_data.copy()

        #Data is currently processed and stored with millisecond precision, removing the last three digits to convert it to seconds precision
        ten_digit_accel['utcTimeMillis'] = accel_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        ten_digit_mag['utcTimeMillis'] = mag_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        ten_digit_gyro['utcTimeMillis'] = gyro_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        
        #If more than one measurement is taken in the same second, average the measurements taken for the same time period and create one line of data for each second
        imu_accel_df = ten_digit_accel.groupby('utcTimeMillis').mean().reset_index()
        imu_mag_df = ten_digit_mag.groupby('utcTimeMillis').mean().reset_index()
        imu_gyro_df = ten_digit_gyro.groupby('utcTimeMillis').mean().reset_index()

        imu_accel_df = imu_accel_df.fillna(method='ffill')
        imu_mag_df = imu_mag_df.fillna(method='ffill')
        imu_gyro_df = imu_gyro_df.fillna(method='ffill')


        #Since measurements for each sensor are recorded with the same column name, column names are customized for each sensor type to avoid confusion when combining data
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementX': 'Accel_MeasurementX'})
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementY': 'Accel_MeasurementY'})
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementZ': 'Accel_MeasurementZ'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementX': 'Mag_MeasurementX'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementY': 'Mag_MeasurementY'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementZ': 'Mag_MeasurementZ'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementX': 'Gyro_MeasurementX'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementY': 'Gyro_MeasurementY'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementZ': 'Gyro_MeasurementZ'})

        #Recombining the processed imu data. Here, the same rows are determined by time, rows with the same time value are combined and the columns of 
        #accel, mag, and gyro measurements are stored separately. 
        total_imu_df = pd.merge(imu_accel_df, imu_mag_df, on='utcTimeMillis', how='inner')
        total_imu_df = pd.merge(total_imu_df, imu_gyro_df, on='utcTimeMillis', how='inner')

        total_imu_df = total_imu_df.fillna(method='ffill')
        
        #Finally, gnss and imu data are paired to take time into account and a single data is created. Thus, the 'X' part of the 
        #data set that will be used later when training the model is created
        x_part = pd.merge(gnss_data, total_imu_df, on='utcTimeMillis', how='inner')
        x_part = x_part.fillna(method='ffill')

        
        #Ground_Truth Data Part:
        df_truth = pd.read_csv('train'+'/'+main_doc_name+'/'+doc_name_list+'/'+'ground_truth.csv')
        data_gt = {'UnixTimeMillis': df_truth["UnixTimeMillis"],
                'LatitudeDegrees': df_truth["LatitudeDegrees"],
                'LongitudeDegrees':df_truth["LongitudeDegrees"],
                'AltitudeMeters':df_truth['AltitudeMeters']
                }
        data_gt = pd.DataFrame(data_gt)
        
        #The time values are arranged according to the data that will be used as 'X' data when training the model. The values UnixTimeMillis column are arranged as to be 10 
        #digit numbers. 
        ten_digit_gt = data_gt.copy()
        ten_digit_gt['UnixTimeMillis'] = unix_to_utc_millis(ten_digit_gt['UnixTimeMillis'] )
        ten_digit_gt['UnixTimeMillis'] = data_gt['UnixTimeMillis'].astype(str).str[:10].astype(int)

        #The column names are set to be the same with the previous data
        ten_digit_gt = ten_digit_gt.rename(columns={'UnixTimeMillis': 'utcTimeMillis'})
        
        #The y part of the training data for training the model
        y_part = ten_digit_gt
                
        #Finally matching the 'x', and the 'y' parts of the data set according to time.
        merged_training_data = pd.merge(x_part, y_part, on='utcTimeMillis', how='inner')
        merged_training_data = merged_training_data.fillna(method='ffill')
                
        
        y_train = merged_training_data[['LatitudeDegrees', 'LongitudeDegrees']]
        x_train = merged_training_data.drop(columns=['LatitudeDegrees', 'LongitudeDegrees', 'AltitudeMeters'])
        x_train_wt = merged_training_data.drop(columns=['utcTimeMillis','LatitudeDegrees', 'LongitudeDegrees', 'AltitudeMeters'])

        
        # Fit the scaler to the data and transform the dataframe
        x_train_normilized = pd.DataFrame(scaler.fit_transform(x_train), columns=x_train.columns)
        x_train_wt_normilized = pd.DataFrame(scaler.fit_transform(x_train_wt), columns=x_train_wt.columns)
        
        #Last Check
        x_train = x_train.fillna(method='ffill')
        x_train_wt = x_train_wt.fillna(method='ffill')
        y_train = y_train.fillna(method='ffill')
                
        #saving the file
        file_path = main_doc_name
        merged_training_data.to_csv(f'train_data2/{file_path}_{counter}.csv', index=False)
        counter = counter + 1
        print('counter:', counter)

counter: 1
counter: 2
counter: 3
counter: 4
counter: 5
counter: 6
counter: 7
counter: 8
counter: 9
counter: 10
counter: 11
counter: 12
counter: 13
counter: 14
counter: 15
counter: 16
counter: 17
counter: 18
counter: 19
counter: 20
counter: 21
counter: 22
counter: 23
counter: 24
counter: 25
counter: 26
counter: 27
counter: 28
counter: 29
counter: 30
counter: 31
counter: 32
counter: 33
counter: 34
counter: 35
counter: 36
counter: 37
counter: 38
counter: 39
counter: 40
counter: 41
counter: 42
counter: 43
counter: 44
counter: 45
counter: 46
counter: 47
counter: 48
counter: 49
counter: 50
counter: 51
counter: 52
counter: 53
counter: 54
counter: 55
counter: 56
counter: 57
counter: 58
counter: 59
counter: 60
counter: 61
counter: 62
counter: 63
counter: 64
counter: 65
counter: 66
counter: 67
counter: 68
counter: 69
counter: 70
counter: 71
counter: 72
counter: 73
counter: 74
counter: 75
counter: 76
counter: 77
counter: 78
counter: 79
counter: 80
counter: 81
counter: 82
counter: 83
counter: 84
c

In [6]:
# Directory containing the CSV files
folder_path = 'train_data'

# List all files in the directory
file_names = os.listdir(folder_path)

# Filter CSV files
csv_files = [file for file in file_names if file.endswith('.csv')]


- Train RNN model:

In [11]:
for doc_name in csv_files:
    #Reading the csv files
    data = pd.read_csv('train_data'+'/'+ doc_name, low_memory=False)
    #print('data', data)
    x_data_to_train_wt = data.copy() 
    y_data_to_train = data.copy()

    x_data_to_train_wt = x_data_to_train_wt.drop(columns=['utcTimeMillis','LatitudeDegrees','LongitudeDegrees','AltitudeMeters'])
    
    #print('x_data_to_train_wt', x_data_to_train_wt)
    y_data_to_train = y_data_to_train.drop(columns=['utcTimeMillis','SvVelocityYEcefMetersPerSecond','SvVelocityZEcefMetersPerSecond','SvVelocityXEcefMetersPerSecond',
                                                    'RawPseudorangeMeters','SvClockBiasMeters','IsrbMeters','TroposphericDelayMeters','IonosphericDelayMeters',
                                                    'WlsPositionXEcefMeters','WlsPositionYEcefMeters','WlsPositionZEcefMeters','Accel_MeasurementX','Accel_MeasurementY',
                                                    'Accel_MeasurementZ','Mag_MeasurementX','Mag_MeasurementY','Mag_MeasurementZ','Gyro_MeasurementX','Gyro_MeasurementY',
                                                    'Gyro_MeasurementZ','AltitudeMeters'])
    
    #print('y_data_to_train',y_data_to_train)
    #Train the RNN model
    los=model_rnn.train_on_batch(x_data_to_train_wt, y_data_to_train)
    print(los)

8215.587890625
7570.44384765625
7249.79443359375
6661.26220703125
7105.26904296875
7570.7021484375
7380.84033203125
8246.0634765625
8649.3798828125
7891.85986328125
7551.3115234375
6294.05615234375
5801.63330078125
10709.4619140625
9314.25
6061.09814453125
6624.478515625
6762.5927734375
9789.892578125
5755.07421875
6430.46630859375
9921.712890625
5410.4033203125
6254.15576171875
10018.3037109375
5261.07763671875
9490.9384765625
4750.3583984375
8381.099609375
7757.2431640625
8248.1943359375
7683.681640625
8196.466796875
7645.25732421875
6792.833984375
6171.34423828125
9839.8681640625
6185.35888671875
9565.27734375
7589.3134765625
8324.5771484375
6716.98828125
6494.54443359375
6708.26611328125
6475.62158203125
5783.1640625
9491.1728515625
5745.80078125
9459.87890625
9970.37109375
5944.66796875
5329.4306640625
5293.16064453125
10930.48828125
4741.927734375
4384.11865234375
4098.64794921875
11849.802734375
4259.634765625
3609.265869140625
11258.302734375
3714.97802734375
3414.330810546875


KeyboardInterrupt: 

- Train LR model:

In [44]:
for doc_name in csv_files:
    #Reading the csv files
    data = pd.read_csv('train_data'+'/'+ doc_name, low_memory=False)
    
    x_data_to_train = data.copy() 
    y_data_to_train = data.copy()

    x_data_to_train_wt = x_data_to_train.drop(columns=['utcTimeMillis','LatitudeDegrees','LongitudeDegrees','AltitudeMeters'], inplace=True)
    y_data_to_train = y_data_to_train.drop(columns=['utcTimeMillis','SvVelocityYEcefMetersPerSecond','SvVelocityZEcefMetersPerSecond','SvVelocityXEcefMetersPerSecond',
                                                    'RawPseudorangeMeters','SvClockBiasMeters','IsrbMeters','TroposphericDelayMeters','IonosphericDelayMeters',
                                                    'WlsPositionXEcefMeters','WlsPositionYEcefMeters','WlsPositionZEcefMeters','Accel_MeasurementX','Accel_MeasurementY',
                                                    'Accel_MeasurementZ','Mag_MeasurementX','Mag_MeasurementY','Mag_MeasurementZ','Gyro_MeasurementX','Gyro_MeasurementY',
                                                    'Gyro_MeasurementZ','AltitudeMeters'], inplace=True)
    #Train the LR model
    model.fit(x_train_wt, y_train)
    

- Train CNN-GRU model:

In [12]:
for doc_name in csv_files:
    #Reading the csv files
    data = pd.read_csv('train_data'+'/'+ doc_name, low_memory=False)
    
    x_data_to_train = data.copy() 
    y_data_to_train = data.copy()

    x_data_to_train_wt = x_data_to_train.drop(columns=['utcTimeMillis','LatitudeDegrees','LongitudeDegrees','AltitudeMeters'], inplace=True)
    y_data_to_train = y_data_to_train.drop(columns=['utcTimeMillis','SvVelocityYEcefMetersPerSecond','SvVelocityZEcefMetersPerSecond','SvVelocityXEcefMetersPerSecond',
                                                    'RawPseudorangeMeters','SvClockBiasMeters','IsrbMeters','TroposphericDelayMeters','IonosphericDelayMeters',
                                                    'WlsPositionXEcefMeters','WlsPositionYEcefMeters','WlsPositionZEcefMeters','Accel_MeasurementX','Accel_MeasurementY',
                                                    'Accel_MeasurementZ','Mag_MeasurementX','Mag_MeasurementY','Mag_MeasurementZ','Gyro_MeasurementX','Gyro_MeasurementY',
                                                    'Gyro_MeasurementZ','AltitudeMeters'], inplace=True)
    # Train the CNN-GRU MODEL:
    train_model_on_file(model_cnn_gru, x_train_wt, y_train)
    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

## TEST

!!!!! TEST AND SAMPLE_SUBMISSION DATA FILES SHOULD BE ADDED WHERE THIS DOCUMENT IS LOCATED BEFORE RUNNING THAT SECTION !!!!!

In [13]:
samp_sum = pd.read_csv('sample_submission.csv')
ten_digit_reference_df = pd.DataFrame(columns=samp_sum.columns)
ten_digit_reference_df['tripId'] =  samp_sum['tripId']
ten_digit_reference_df['UnixTimeMillis'] =  samp_sum['UnixTimeMillis']
ten_digit_reference_df['UnixTimeMillis'] = ten_digit_reference_df['UnixTimeMillis'].astype(str).str[:10].astype(int)

In [14]:
ten_digit_reference_df.shape

(71936, 4)

In [15]:
total_x_train = pd.DataFrame(index=range(71936), columns=['tripId', 'UnixTimeMillis', 'SvVelocityYEcefMetersPerSecond', 'SvVelocityZEcefMetersPerSecond',
                                                   'SvVelocityXEcefMetersPerSecond', 'RawPseudorangeMeters', 'SvClockBiasMeters', 'IsrbMeters', 'TroposphericDelayMeters',
                                                   'IonosphericDelayMeters', 'WlsPositionXEcefMeters', 'WlsPositionYEcefMeters', 'WlsPositionZEcefMeters','Accel_MeasurementX',
                                                     'Accel_MeasurementY', 'Accel_MeasurementZ','Mag_MeasurementX', 'Mag_MeasurementY','Mag_MeasurementZ',
                                                       'Gyro_MeasurementX', 'Gyro_MeasurementY', 'Gyro_MeasurementZ'])
total_x_train['tripId'] = ten_digit_reference_df['tripId'] 
total_x_train['UnixTimeMillis'] = ten_digit_reference_df['UnixTimeMillis'] 

In [16]:
# Initialize an empty DataFrame
data = {'tripId': [], 'UnixTimeMillis': [], 'LatitudeDegrees': [], 'LongitudeDegrees': []}
df = pd.DataFrame(data)
for main_doc_name in test_doc_list:
    sub_sub_list = list_subdirectories('test'+'/'+main_doc_name)
    for doc_name_list in sub_sub_list:

        #GNSS Data Part:

        #Reading the present gnss csv file
        df_gnss = pd.read_csv('test'+'/'+main_doc_name+'/'+doc_name_list+'/'+'device_gnss.csv', low_memory=False)

        data_gnss = pd.DataFrame()
        #Taking only the necessary columns
        data_gnss['utcTimeMillis'] = df_gnss["utcTimeMillis"]
        data_gnss['SvVelocityYEcefMetersPerSecond']= df_gnss["SvVelocityYEcefMetersPerSecond"]
        data_gnss['SvVelocityZEcefMetersPerSecond'] = df_gnss['SvVelocityZEcefMetersPerSecond']
        data_gnss['SvVelocityXEcefMetersPerSecond'] = df_gnss['SvVelocityXEcefMetersPerSecond']
        data_gnss['RawPseudorangeMeters'] = df_gnss['RawPseudorangeMeters']
        data_gnss['SvClockBiasMeters'] = df_gnss['SvClockBiasMeters']
        data_gnss['IsrbMeters'] = df_gnss['IsrbMeters']
        data_gnss['TroposphericDelayMeters'] = df_gnss['TroposphericDelayMeters']
        data_gnss['IonosphericDelayMeters'] = df_gnss['IonosphericDelayMeters']
        data_gnss['WlsPositionXEcefMeters'] = df_gnss['WlsPositionXEcefMeters']
        data_gnss['WlsPositionYEcefMeters'] = df_gnss['WlsPositionYEcefMeters']
        data_gnss['WlsPositionZEcefMeters'] = df_gnss['WlsPositionZEcefMeters']
                    

        #If more than one measurement is taken in the same second, average the measurements taken for the same time period and create one line of data for each second
        ten_digit_gnss = data_gnss.copy()
        ten_digit_gnss['utcTimeMillis'] = utc_to_unix_millis(ten_digit_gnss['utcTimeMillis'])
        ten_digit_gnss['utcTimeMillis'] = data_gnss['utcTimeMillis'].astype(str).str[:10].astype(int)
        gnss_data = ten_digit_gnss.groupby('utcTimeMillis').mean().reset_index()
        gnss_data = gnss_data.fillna(method='ffill')


        #IMU Data Part:
        #Reading the present imu csv file
        df_imu = pd.read_csv('test'+'/'+main_doc_name+'/'+doc_name_list+'/'+'device_imu.csv', low_memory=False)

        #Taking only the necessary columns
        data_imu = {'MessageType': df_imu["MessageType"],
                    'utcTimeMillis': df_imu["utcTimeMillis"],
                    'MeasurementX': df_imu["MeasurementX"],
                    'MeasurementY':df_imu["MeasurementY"],
                    'MeasurementZ':df_imu['MeasurementZ']
                    }
        # Transforming it to Pandas DataFrame to be able to make moves easily
        data_imu = pd.DataFrame(data_imu)
        
        #Since the accelerometer, gyro and magnetometer are combined in the same sensor in the IMU, it can be seen that the data received from these sensors 
        #are stored in a mixed manner. To fix this and evaluate each sensor data on its own, three sensor data are transferred to three different dataframes
        accel_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalAccel']
        mag_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalMag']
        gyro_imu_data = data_imu.loc[data_imu['MessageType'] == 'UncalGyro']
        
        #For the next step empty DataFrames are created
        ten_digit_accel = accel_imu_data.copy()
        ten_digit_accel['utcTimeMillis'] = utc_to_unix_millis(ten_digit_accel['utcTimeMillis'])

        ten_digit_mag = mag_imu_data.copy()
        ten_digit_mag['utcTimeMillis'] = utc_to_unix_millis(ten_digit_mag['utcTimeMillis'])

        ten_digit_gyro = gyro_imu_data.copy()
        ten_digit_gyro['utcTimeMillis'] = utc_to_unix_millis(ten_digit_gyro['utcTimeMillis'])

        #Data is currently processed and stored with millisecond precision, removing the last three digits to convert it to seconds precision
        ten_digit_accel['utcTimeMillis'] = accel_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        ten_digit_mag['utcTimeMillis'] = mag_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        ten_digit_gyro['utcTimeMillis'] = gyro_imu_data['utcTimeMillis'].astype(str).str[:10].astype(int)
        
        #If more than one measurement is taken in the same second, average the measurements taken for the same time period and create one line of data for each second
        imu_accel_df = ten_digit_accel.groupby('utcTimeMillis').mean().reset_index()
        imu_mag_df = ten_digit_mag.groupby('utcTimeMillis').mean().reset_index()
        imu_gyro_df = ten_digit_gyro.groupby('utcTimeMillis').mean().reset_index()

        imu_accel_df = imu_accel_df.fillna(method='ffill')
        imu_mag_df = imu_mag_df.fillna(method='ffill')
        imu_gyro_df = imu_gyro_df.fillna(method='ffill')


        #Since measurements for each sensor are recorded with the same column name, column names are customized for each sensor type to avoid confusion when combining data
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementX': 'Accel_MeasurementX'})
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementY': 'Accel_MeasurementY'})
        imu_accel_df = imu_accel_df.rename(columns={'MeasurementZ': 'Accel_MeasurementZ'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementX': 'Mag_MeasurementX'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementY': 'Mag_MeasurementY'})
        imu_mag_df = imu_mag_df.rename(columns={'MeasurementZ': 'Mag_MeasurementZ'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementX': 'Gyro_MeasurementX'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementY': 'Gyro_MeasurementY'})
        imu_gyro_df = imu_gyro_df.rename(columns={'MeasurementZ': 'Gyro_MeasurementZ'})

        

        #Recombining the processed imu data. Here, the same rows are determined by time, rows with the same time value are combined and the columns of 
        #accel, mag, and gyro measurements are stored separately. 
        total_imu_df = pd.merge(imu_accel_df, imu_mag_df, on='utcTimeMillis', how='inner')
        total_imu_df = pd.merge(total_imu_df, imu_gyro_df, on='utcTimeMillis', how='inner')

        total_imu_df = total_imu_df.fillna(method='ffill')
        
        #Finally, gnss and imu data are paired to take time into account and a single data is created. Thus, the 'X' part of the 
        #data set that will be used later when training the model is created
        x_part = pd.merge(gnss_data, total_imu_df, on='utcTimeMillis', how='inner')
        x_part = x_part.rename(columns={'utcTimeMillis': 'UnixTimeMillis'})
        x_part_wt = x_part.drop(columns=['UnixTimeMillis'])

        #Last check for NAN values
        x_part = x_part.fillna(method='ffill')
        x_part_wt = x_part_wt.fillna(method='ffill')

        # Predict with the CNN-GRU MODEL:
        predicted_cnn_gru = model_rnn.predict(x_part_wt)
        
        # Create a dictionary with empty lists for each column
        new_data = {
            'tripId': [],
            'UnixTimeMillis': [],
            'LatitudeDegrees': [],
            'LongitudeDegrees': []
        }

        # Create the new_data DataFrame
        new_data = pd.DataFrame()

        # Assign values to columns
        new_data['tripId'] = []
        new_data['UnixTimeMillis'] = x_part['UnixTimeMillis']
        new_data['tripId'] = main_doc_name + '/' + doc_name_list
        new_data['LatitudeDegrees'] = predicted_cnn_gru[:, 0]  # Assuming the first column contains LatitudeDegrees
        new_data['LongitudeDegrees'] = predicted_cnn_gru[:, 1]
        
        df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)
        




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)




  df_cnn_gru = df.append(pd.DataFrame(new_data), ignore_index=True)


In [17]:
for index, row in ten_digit_reference_df.iterrows():
    trip_id = row['tripId']
    unix_time_millis = row['UnixTimeMillis']
    
    matched_row = df_cnn_gru[(df_cnn_gru['tripId'] == trip_id) & (df_cnn_gru['UnixTimeMillis'] == unix_time_millis)]
    
    # Eğer eşleşen bir satır bulunduysa
    if not matched_row.empty:
        # LatitudeDegrees ve LongitudeDegrees sütunlarını ilgili hücrelere atayın
        ten_digit_reference_df.at[index, 'LatitudeDegrees'] = matched_row['LatitudeDegrees'].iloc[0]
        ten_digit_reference_df.at[index, 'LongitudeDegrees'] = matched_row['LongitudeDegrees'].iloc[0]
    else:
        # Eşleşen bir satır bulunamazsa, ilgili hücrelere NaN (veya başka bir varsayılan değer) atayın
        ten_digit_reference_df.at[index, 'LatitudeDegrees'] = None
        ten_digit_reference_df.at[index, 'LongitudeDegrees'] = None


In [18]:
# Eksik değerleri bir üstündeki değerlerle dolduralım
df_filled = ten_digit_reference_df.fillna(method='ffill')

print(df_filled)

                                         tripId  UnixTimeMillis  \
0         2020-12-11-19-30-us-ca-mtv-e/pixel4xl      1607715055   
1         2020-12-11-19-30-us-ca-mtv-e/pixel4xl      1607715056   
2         2020-12-11-19-30-us-ca-mtv-e/pixel4xl      1607715057   
3         2020-12-11-19-30-us-ca-mtv-e/pixel4xl      1607715058   
4         2020-12-11-19-30-us-ca-mtv-e/pixel4xl      1607715059   
...                                         ...             ...   
71931  2023-06-15-18-49-us-ca-sjc-ce1/pixel7pro      1686856468   
71932  2023-06-15-18-49-us-ca-sjc-ce1/pixel7pro      1686856469   
71933  2023-06-15-18-49-us-ca-sjc-ce1/pixel7pro      1686856470   
71934  2023-06-15-18-49-us-ca-sjc-ce1/pixel7pro      1686856471   
71935  2023-06-15-18-49-us-ca-sjc-ce1/pixel7pro      1686856472   

      LatitudeDegrees LongitudeDegrees  
0                None             None  
1                None             None  
2                None             None  
3                None          

In [53]:
df_filled['UnixTimeMillis'] = samp_sum['UnixTimeMillis'] 
# Export the final DataFrame to a CSV file
df_filled.to_csv('deneme.csv', index=False)