In [24]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# ======= PARAMETERS =======
image_size = (64, 64)  # Resize all images to 64x64
data_dir = "C:\\Users\\fayc3\\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\Apple vs Orange"
predict_dir = "C:\\Users\\fayc3\\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\Apple vs Orange\\predict"

# ======= LOAD AND PREPROCESS DATA =======
X = []
y = []

for label in os.listdir(data_dir):
    class_path = os.path.join(data_dir, label)
    if os.path.isdir(class_path):
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, image_size)
                X.append(img.flatten())  # Flatten the image into 1D array
                y.append(label)
            except Exception as e:
                print(f"⚠️ Skipped {img_path}: {e}")

X = np.array(X)
le = LabelEncoder()
y = le.fit_transform(y)

# ======= TRAIN / TEST SPLIT =======
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ======= TRAIN SVM MODEL =======
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# ======= EVALUATE ON TEST SET =======
y_pred = model.predict(X_test)
print("📊 Classification Report:")
print(classification_report(
    y_test,
    y_pred,
    target_names=le.classes_,
    labels=np.unique(y_test)
))


# ======= PREDICT NEW IMAGE =======
print("\n🔮 Predictions for images in 'predict' folder:")
for filename in os.listdir(predict_dir):
    file_path = os.path.join(predict_dir, filename)
    img = cv2.imread(file_path)
    if img is not None:
        img_resized = cv2.resize(img, image_size).flatten().reshape(1, -1)
        pred = model.predict(img_resized)
        label = le.inverse_transform(pred)[0]
        print(f"🖼️ '{filename}' is predicted to be: 🍎 Apple" if label.lower() == 'apple' else f"🖼️ '{filename}' is predicted to be: 🍊 Orange")
    else:
        print(f"⚠️ Could not read image: {filename}")


📊 Classification Report:
              precision    recall  f1-score   support

       Apple       0.65      0.70      0.68        37
      Orange       0.59      0.53      0.56        30

    accuracy                           0.63        67
   macro avg       0.62      0.62      0.62        67
weighted avg       0.62      0.63      0.62        67


🔮 Predictions for images in 'predict' folder:
🖼️ 'images.jpg' is predicted to be: 🍊 Orange




Training SVM on text data


In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
import pandas as pd
import nltk
nltk.download('stopwords')

# Load the IMDB dataset from CSV
# Replace the file paths with the correct paths to your CSV files
train_data = pd.read_csv("C:\\Users\\fayc3\\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\IMDB Dataset.csv")
test_data = pd.read_csv("C:\\Users\\fayc3\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\aciimdb_test.csv")

train_data = train_data.iloc[ :300, :]
train_data = train_data.dropna()
test_data = test_data.dropna()
# Separate features and labels
X_train, y_train = train_data['review'], train_data['sentiment']
X_test, y_test = test_data['review'], test_data['sentiment']

# Convert sentiment labels to numerical values (e.g., positive -> 1, negative -> 0)
y_train = y_train.map({'positive': 1, 'negative': 0})
y_test = y_test.map({'positive': 1, 'negative': 0})

# Create the pipeline
text_clf = make_pipeline(
    TfidfVectorizer(stop_words='english', max_df=0.7),
    LinearSVC()
)

# Train the model
text_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = text_clf.predict(X_test)

# Print the classification report
print("Text Classification Report:")
print(classification_report(y_test, y_pred))

  test_data = pd.read_csv("C:\\Users\\fayc3\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\aciimdb_test.csv")
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\fayc3\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Text Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.80      0.89         5
           1       0.83      1.00      0.91         5

    accuracy                           0.90        10
   macro avg       0.92      0.90      0.90        10
weighted avg       0.92      0.90      0.90        10



Numerical data 


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load the dataset
data = pd.read_csv('C:\\Users\\fayc3\\OneDrive\\Desktop\\Semestre 2\\Machine Learning\\3 Machine learning algorithm\\DATASETS\\all_stocks_5yr.csv')

# Step 2: Preprocess the data
# Drop unnecessary columns like 'date' and 'Name', and drop rows with missing values
data_dropped = data.drop(['date', 'Name', 'volume'], axis=1)
data_dropped = data_dropped.dropna()

# Select a subset of rows and columns for training (adjust as necessary)
data_dropped = data_dropped.iloc[:3000, :]  # First 3000 rows

# Separate features and target variable
X = data_dropped.drop('open', axis=1)  # Features
y = data_dropped['open']  # Target variable

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Step 3: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the SVR model
svr_model = SVR(kernel='linear', C=1.0)  # You can change the kernel to 'rbf', 'poly', etc.
svr_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = svr_model.predict(X_test)

# Step 6: Evaluate the model
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

Mean Squared Error: 0.43774479588073517
R^2 Score: 0.9997502253696101
