In [None]:
%pip install pandas
%pip install numpy
%pip install sklearn
%pip install matplotlib

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('data/out.csv')

# Drop columns that are not needed
data = data.drop('Year', axis=1)
data = data.drop('Country', axis=1)

In [None]:
# Normalize data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

In [None]:
# Split data into train and validation, using a ratio of 60:40
train_size = int(0.6 * len(data_scaled))
train_data = data_scaled[:train_size]
val_data = data_scaled[train_size:]

In [None]:
# Initialize and train the model on our training data
# Parameters determined by trial and error
svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)
svm.fit(train_data)

In [None]:
# Predict anomalies on validation data
val_predictions = svm.predict(val_data)
val_labels = np.ones(len(val_data))
val_labels[val_predictions == -1] = 0  # Anomalies are labeled as -1

# Calculate precision, recall and F1-score
precision = precision_score(val_labels, val_predictions, average='weighted')
recall = recall_score(val_labels, val_predictions, average='weighted')
f1 = f1_score(val_labels, val_predictions, average='weighted')

# Print results
print(f'Precision: {precision:.3f}, Recall: {recall:.3f}, F1-score: {f1:.3f}')

### Display Anomalies From Different Columns

In [None]:
# Anomalies in homicide per capita
plt.figure(figsize=(12, 6))
plt.scatter(data.index, data['Homicide per capita'], label='Normal', s=10)
plt.scatter(np.where(val_predictions==-1)[0] + train_size, val_data[val_predictions == -1, 0], label='Anomaly', s=50, color='red')
plt.title('Anomaly Detection - Homicide per Capita')
plt.xlabel('Sample Index')
plt.ylabel('Homicide per Capita')
plt.legend()
plt.show()

In [None]:
# Anomalies in total emissions
plt.figure(figsize=(12, 6))
plt.scatter(data.index, data['Total Emissions'], label='Normal', s=10)
plt.scatter(np.where(val_predictions==-1)[0] + train_size, val_data[val_predictions == -1, 0], label='Anomaly', s=50, color='red')
plt.title('Anomaly Detection - Total Emissions')
plt.xlabel('Sample Index')
plt.ylabel('Total Emissions')
plt.legend()
plt.show()

In [None]:
# Anomalies in energy production to consumption
plt.figure(figsize=(12, 6))
plt.scatter(data.index, data['Energy Production to Consumption'], label='Normal', s=10)
plt.scatter(np.where(val_predictions==-1)[0] + train_size, val_data[val_predictions == -1, 0], label='Anomaly', s=50, color='red')
plt.title('Anomaly Detection - Energy Production to Consumption')
plt.xlabel('Sample Index')
plt.ylabel('Energy Production to Consumption')
plt.legend()
plt.show()