In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the preprocessed dataset
df = pd.read_csv('../data/preprocessed_gnss_data.csv', delimiter=';')
print(df.head())
# Separate timestamp, lat and long
#df = df.drop(columns=['timestamp', 'latitude', 'longitude'])
df = df.drop(columns=['timestamp', 'num_satellites', 'height', 'latitude', 'longitude'])
df = df.drop(columns=['constellation_GPS', 'constellation_Galileo', 'constellation_QZSS'])

# Separate features and target variable
X = df.drop('class', axis=1)
y = df['class']
print(X.head())
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model

report = classification_report(y_test, y_pred)
print('Classification Report:\n', report)

cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:\n', cm)

feature_importances = pd.Series(rf_classifier.feature_importances_, index=X.columns)
feature_importances.sort_values(ascending=False, inplace=True)
print('Feature Importances:\n', feature_importances)


      timestamp       AGC       SNR  latitude  longitude   height  \
0  1.725971e+12  1.396530  0.621419 -0.790538   0.331466  59.9833   
1  1.725971e+12 -0.428091  0.992556 -0.790538   0.331466  59.9833   
2  1.725971e+12 -0.428091 -0.120853 -0.790538   0.331466  59.9833   
3  1.725971e+12  1.437940  0.621419 -0.803930   0.356746  56.3828   
4  1.725971e+12 -0.599573  0.992556 -0.803930   0.356746  56.3828   

   num_satellites       class  constellation_GPS  constellation_Galileo  \
0            13.0  legitimate                0.0                    0.0   
1            13.0  legitimate                1.0                    0.0   
2            13.0  legitimate                0.0                    1.0   
3            16.0  legitimate                0.0                    0.0   
4            16.0  legitimate                1.0                    0.0   

   constellation_QZSS  
0                 0.0  
1                 0.0  
2                 0.0  
3                 0.0  
4             