In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score


# Load the dataset
dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/WinnipegDataset.txt')
print(dataset.head())
print(dataset.describe())

# Calculate the correlation matrix
correlation_matrix = dataset.corr()

# Find columns that exceed the threshold
high_corr = np.where(abs(correlation_matrix) > 0.95)
high_corr_pairs = [(correlation_matrix.columns[x], correlation_matrix.columns[y]) for x, y in zip(*high_corr) if x != y and x < y]

# Print the highly correlated pairs
print("Highly correlated pairs:")
for pair in high_corr_pairs:
    print(pair)

# Choose one feature from each pair to remove
features_to_remove = set([pair[1] for pair in high_corr_pairs])
print("Features to remove:")
print(features_to_remove)

# Drop the highly correlated features
dataset = dataset.drop(columns=list(features_to_remove))

# Split the dataset
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVM
from sklearn.svm import SVC
svm_classifier = SVC(kernel = 'poly', random_state = 0)
svm_classifier.fit(X_train_scaled, y_train)

# Evaluate the SVM
y_pred = svm_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Generating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

   label      f1      f2       f3      f4      f5      f6      f7      f8  \
0      1 -13.559 -21.407 -11.4040 -15.248 -11.923 -15.291 -2.1548 -7.8474   
1      1 -12.802 -20.335 -10.3990 -14.132 -11.096 -14.361 -2.4039 -7.5330   
2      1 -12.431 -19.902 -10.0740 -13.598 -10.829 -14.048 -2.3566 -7.4717   
3      1 -12.689 -19.529 -10.0280 -13.350 -11.056 -14.014 -2.6611 -6.8396   
4      1 -12.686 -19.278  -9.8185 -13.108 -10.932 -13.939 -2.8675 -6.5919   

        f9  ...     f165     f166    f167      f168     f169     f170  \
0 -10.0020  ...  0.18519  0.72602  5.3333  6.000000  0.29489  9.77780   
1  -9.9369  ...  0.33333 -0.48751  2.1111  0.098765  0.83333  0.33333   
2  -9.8283  ...  0.25926  0.25298  2.2222  0.172840  0.68889  0.88889   
3  -9.5006  ...  0.16049  0.43750  4.1111  0.320990  0.83333  0.33333   
4  -9.4594  ...  0.18519  0.35000  4.0000  0.444440  0.68889  0.88889   

      f171     f172     f173     f174  
0  2.44440  1.67700  0.20988  0.65422  
1  0.33333  0.8486

KeyboardInterrupt: 