#**Ahmed Alshafeay**

# **Vegetable Image**

In [None]:
import os
import numpy as np
import cv2
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA

### **Install and set up Kaggle API**

In [None]:
!pip install kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d misrakahmed/vegetable-image-dataset

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset
License(s): CC-BY-SA-4.0
vegetable-image-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


### **Extract dataset**

In [None]:
with zipfile.ZipFile("vegetable-image-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("./vegetable_dataset")

### **Define dataset path**

In [None]:
data_dir = "./vegetable_dataset/Vegetable Images/train"

## **Image Preprocessing**

### Reduce image size to speed up processing

In [None]:
IMG_SIZE = (32, 32)

In [None]:
data = []
labels = []

In [None]:
for category in os.listdir(data_dir):
    category_path = os.path.join(data_dir, category)
    if os.path.isdir(category_path):
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, IMG_SIZE).flatten()
                data.append(img)
                labels.append(category)


### Convert lists to NumPy arrays

In [None]:
data = np.array(data)
labels = np.array(labels)

### Encode labels to numerical values

In [None]:
le = LabelEncoder()
labels = le.fit_transform(labels)

### Train-Test Split (80% train, 20% test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

### Standardize features (mean=0, variance=1)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Apply PCA for dimensionality reduction

In [None]:
pca = PCA(n_components=100)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

### Train Machine Learning Model (Random Forest)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

### Predictions and Accuracy Calculation

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 85.23%
