# ML Overview
The Wine dataset contains chemical properties of wine + a target variable indicating wine class (0, 1, 2).

1) Collect Data
2) Split Data --> training + testing
3) Train the model on training dataset
4) Evaluate the model on test dataset

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

# 1. LOAD DATASET
dataset = load_iris()

X = pd.DataFrame(dataset.data, columns=dataset.feature_names)
y = dataset.target

print("Dataset shape:", X.shape)
print("Classes:", dataset.target_names)

Dataset shape: (150, 4)
Classes: ['setosa' 'versicolor' 'virginica']


In [10]:
print(X.head(10))

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
140                6.7               3.1                5.6               2.4
141                6.9               3.1                5.1               2.3
142                5.8               2.7                5.1               1.9
143                6.8               3.2                5.9               2.3
144                6.7               3.3                5.7               2.5
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8


In [3]:
print(y[:10])

[0 0 0 0 0 0 0 0 0 0]


In [4]:
# 2. TRAIN–TEST SPLIT

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,)

print("shape of X_train:", X_train.shape)
print("shape of X_test:", X_test.shape)

shape of X_train: (120, 4)
shape of X_test: (30, 4)


In [5]:
# 3. TRAIN MODEL
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

model.fit(X_train, y_train)

In [13]:
#       sepal length, sepal width, petal length, petal width
sample = [[5.9, 3.0, 5.1, 1.8],] 
sample_df = pd.DataFrame(sample, columns=dataset.feature_names)

prediction = model.predict(sample_df)
print("Prediction:", prediction)
print("Classes:", dataset.target_names)

Prediction: [2]
Classes: ['setosa' 'versicolor' 'virginica']


In [6]:
# 4. EVALUATE MODEL

y_pred = model.predict(X_test)

In [7]:
# 5. Check accuracy
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)

Model Accuracy: 1.0
