<a href="https://colab.research.google.com/github/dionysusshan/ml/blob/main/SVM_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: python code for data prediction in csv file using the SVM

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the data from the CSV file
data = pd.read_csv('/content/sample_data/imputed_dataset.csv')

# Separate the features and labels
features = data.iloc[:, :-1]
labels = data.iloc[:, -1]

# Check if labels are continuous. If so, convert them to discrete categories if applicable.
if np.issubdtype(labels.dtype, np.number):  # Check if labels are numeric
    # Example: Convert continuous labels to binary categories based on a threshold
    threshold = labels.mean()
    labels = (labels > threshold).astype(int)  # 1 if above threshold, 0 otherwise
    print("Labels converted to binary categories.")
else:
    print("Labels are not continuous.")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.25)

# Create an SVM classifier
clf = SVC(kernel='linear')

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(y_test, predictions)

# Print the accuracy
print("Accuracy:", accuracy)

Labels converted to binary categories.
Accuracy: 1.0


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Data
# Replace 'your_dataset.csv' with the path to your CSV file
file_path = '/content/sample_data/imputed_datasetknn.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Original Data:")
print(df.head())

# Step 2: Preprocess Data
# Assume the last column is the target variable
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Define the SVM Model
# For regression, we'll use SVR (Support Vector Regression)
svm_model = SVR(kernel='rbf')

# Step 4: Train the Model
svm_model.fit(X_train, y_train)

# Step 5: Make Predictions
y_pred = svm_model.predict(X_test)

# Step 6: Evaluate the Model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Optional: Save the model using joblib
import joblib
joblib.dump(svm_model, '/content/svm_model.pkl')
print("Model saved as '/content/svm_model.pkl'")


Original Data:
    AQI-IN    PM25    PM10     PM1  Temp(cel)     Hum   Noise  TVOC(ppm)  \
0  114.358  64.170  85.939  60.400     22.607  98.277  48.345      0.009   
1   95.474  57.260  75.377  53.974     22.373  95.771  48.370      0.010   
2   78.380  47.045  59.341  44.598     24.292  85.416  48.078      0.008   
3   65.078  39.044  47.500  37.322     28.122  71.982  50.844      0.008   
4   59.369  35.598  42.358  34.196     30.972  63.498  50.855      0.010   

   CO(ppm)  CO2(ppm)  SO2(ppm)  NO2(ppm)  O3(ppm)  AQI-IN(F)     CI    VI  \
0    0.392   482.552     0.002     0.008    0.021    114.358  9.873  10.0   
1    0.454   486.747     0.002     0.008    0.023     95.474  9.006  10.0   
2    0.667   482.067     0.003     0.009    0.025     82.128  9.000  10.0   
3    0.680   462.433     0.002     0.009    0.026     77.250  9.000  10.0   
4    0.697   455.927     0.002     0.009    0.024     76.173  9.000  10.0   

   particle count(0 3)  particle count(0 5)  particle count(1 0) 