In [1]:
import numpy as np  # Fundamental package for numerical computing
import pandas as pd  # Library for data manipulation and analysis
from sklearn.preprocessing import MinMaxScaler

In [30]:
file_path = 'Iris.csv'  # Adjust the path if necessary
data = pd.read_csv(file_path)
features = data.columns[1:-1]  # Feature columns (exclude the last one)
x = data.loc[:, features].values
min_vals = x.min(axis=0)  # Min of each feature
max_vals = x.max(axis=0)  # Max of each feature
x_normalized = (x - min_vals) / (max_vals - min_vals)
x_normalized_df = pd.DataFrame(x_normalized, columns=features)

# Add back the 'Id' column and the label/species column
x_normalized_df.insert(0, 'Id', data['Id'])  # Insert 'Id' as the first column
x_normalized_df['Species'] = data['Species']  # Add 'Species' column

# Separate known and unknown sample IDs and features
KNOWN_SAMPLES = x_normalized_df.dropna(subset=['Species']).to_dict(orient='records')  # Known samples
UNKNOWN_SAMPLES = x_normalized_df[x_normalized_df['Species'].isna()].to_dict(orient='records')  # Unknown samples



In [33]:
KNOWN_SAMPLES

[{'Id': 1,
  'SepalLengthCm': 0.5277777777777778,
  'SepalWidthCm': 0.0833333333333334,
  'PetalLengthCm': 0.5932203389830508,
  'PetalWidthCm': 0.5833333333333334,
  'Species': 'Iris-versicolor'},
 {'Id': 2,
  'SepalLengthCm': 0.19444444444444448,
  'SepalWidthCm': 0.6249999999999999,
  'PetalLengthCm': 0.05084745762711865,
  'PetalWidthCm': 0.08333333333333333,
  'Species': 'Iris-setosa'},
 {'Id': 3,
  'SepalLengthCm': 0.5555555555555555,
  'SepalWidthCm': 0.5416666666666665,
  'PetalLengthCm': 0.6271186440677966,
  'PetalWidthCm': 0.625,
  'Species': 'Iris-versicolor'},
 {'Id': 4,
  'SepalLengthCm': 0.4999999999999999,
  'SepalWidthCm': 0.3749999999999999,
  'PetalLengthCm': 0.6271186440677966,
  'PetalWidthCm': 0.5416666666666666,
  'Species': 'Iris-versicolor'},
 {'Id': 5,
  'SepalLengthCm': 0.8055555555555556,
  'SepalWidthCm': 0.41666666666666663,
  'PetalLengthCm': 0.8135593220338982,
  'PetalWidthCm': 0.625,
  'Species': 'Iris-virginica'},
 {'Id': 6,
  'SepalLengthCm': 0.66666

In [32]:
for known_sample in KNOWN_SAMPLES:
    known_features = [known_sample[feature] for feature in features]  # Dynamic feature extraction
    print(known_features)
    break

[0.5277777777777778, 0.0833333333333334, 0.5932203389830508, 0.5833333333333334]


In [None]:
labeled = data.dropna(subset=['Species'])  # Labeled data with known species

In [34]:
len(features)

4

In [None]:


# Normalize with MinMaxScaler
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(labeled)

print(normalized_data)