<a href="https://colab.research.google.com/github/dionysusshan/ml/blob/main/SVM_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: python code for data prediction in csv file using the SVM

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the data from the CSV file
data = pd.read_csv('/content/sample_data/imputed_dataset.csv')

# Separate the features and labels
features = data.iloc[:, :-1]
labels = data.iloc[:, -1]

# Check if labels are continuous. If so, convert them to discrete categories if applicable.
if np.issubdtype(labels.dtype, np.number):  # Check if labels are numeric
    # Example: Convert continuous labels to binary categories based on a threshold
    threshold = labels.mean()
    labels = (labels > threshold).astype(int)  # 1 if above threshold, 0 otherwise
    print("Labels converted to binary categories.")
else:
    print("Labels are not continuous.")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.25)

# Create an SVM classifier
clf = SVC(kernel='linear')

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(y_test, predictions)

# Print the accuracy
print("Accuracy:", accuracy)

Labels converted to binary categories.
Accuracy: 1.0


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Data
# Replace 'your_dataset.csv' with the path to your CSV file
file_path = '/content/imputed_dataset.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Original Data:")
print(df.head())

# Step 2: Preprocess Data
# Assume the last column is the target variable
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Define the SVM Model
# For regression, we'll use SVR (Support Vector Regression)
svm_model = SVR(kernel='rbf')

# Step 4: Train the Model
svm_model.fit(X_train, y_train)

# Step 5: Make Predictions
y_pred = svm_model.predict(X_test)

# Step 6: Evaluate the Model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Optional: Save the model using joblib
import joblib
joblib.dump(svm_model, '/content/svm_model.pkl')
print("Model saved as '/content/svm_model.pkl'")


Original Data:
     AQI-IN     PM25    PM10      PM1  Temp(cel)      Hum    Noise  TVOC(ppm)  \
0  114.3580  64.1700  85.939  60.4000    22.6070  98.2770  48.3450     0.0090   
1   95.4740  57.2600  75.377  53.9740    22.3730  95.7710  48.3700     0.0100   
2   78.3800  47.0450  59.341  44.5980    30.2404  85.4160  48.0780     0.0104   
3   65.0780  38.7242  47.500  37.3220    28.1220  65.4868  50.8440     0.0080   
4   57.6288  35.5980  42.358  33.3266    30.9720  63.4980  50.1188     0.0100   

   CO(ppm)  CO2(ppm)  ...  O3(ppm)  AQI-IN(F)  AQI-IN(s)      CI    VI  \
0    0.392   482.552  ...   0.0210   114.3580   114.3580  9.8730  10.0   
1    0.454   486.747  ...   0.0230    95.4740    95.4740  9.0060  10.0   
2    0.667   482.067  ...   0.0234    82.1280    78.3800  9.0000  10.0   
3    0.680   458.754  ...   0.0260    79.4324    65.0780  9.0272  10.0   
4    0.750   455.927  ...   0.0240    76.1730    57.6288  9.0000  10.0   

   particle count(0.3)  particle count(0.5)  particle