In [5]:
import pandas as pd
import os

# Correct file paths
synthetic_path = r"C:\Users\ashna\Test\data\Synthetic_Flower_2ndDataset.csv"
iris_path = r"C:\Users\ashna\Test\data\iris_.csv"

# Check if files exist
if os.path.exists(synthetic_path):
    print(f"✅ Synthetic dataset found: {synthetic_path}")
else:
    print(f"❌ Synthetic dataset NOT found at: {synthetic_path}")

if os.path.exists(iris_path):
    print(f"✅ Original dataset found: {iris_path}")
else:
    print(f"❌ Original dataset NOT found at: {iris_path}")


✅ Synthetic dataset found: C:\Users\ashna\Test\data\Synthetic_Flower_2ndDataset.csv
✅ Original dataset found: C:\Users\ashna\Test\data\iris_.csv


In [7]:
# Load datasets
iris_df = pd.read_csv(iris_path)
synthetic_df = pd.read_csv(synthetic_path)

print("✅ Both datasets loaded successfully!")

# Display first few rows to verify
print("🔹 Original Iris Dataset Preview:")
display(iris_df.head())

print("🔹 Synthetic Flower Dataset Preview:")
display(synthetic_df.head())


✅ Both datasets loaded successfully!
🔹 Original Iris Dataset Preview:


Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


🔹 Synthetic Flower Dataset Preview:


Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species,Flower_ID
0,5.059971,2.835939,4.737343,2.284797,Lily,1
1,8.228929,4.724798,1.512085,1.191153,Orchid,2
2,7.025967,2.718686,1.31009,0.342152,Lily,3
3,6.292622,2.434685,4.188128,2.353726,Orchid,4
4,3.858103,3.468358,4.243811,1.719372,Rose,5


In [9]:
# Add 'Flower_ID' column if missing
if "Flower_ID" not in synthetic_df.columns:
    synthetic_df["Flower_ID"] = range(1, len(synthetic_df) + 1)

if "Flower_ID" not in iris_df.columns:
    iris_df["Flower_ID"] = range(1, len(iris_df) + 1)

print("✅ 'Flower_ID' column is ready for merging!")


✅ 'Flower_ID' column is ready for merging!


In [13]:
# Merge the datasets on 'Flower_ID'
merged_df = pd.merge(iris_df, synthetic_df, on="Flower_ID", how="left")

print("✅ Datasets merged successfully!")
display(merged_df.head())


✅ Datasets merged successfully!


Unnamed: 0,Sepal_Length_x,Sepal_Width_x,Petal_Length_x,Petal_Width_x,Species_x,Flower_ID,Sepal_Length_y,Sepal_Width_y,Petal_Length_y,Petal_Width_y,Species_y
0,5.1,3.5,1.4,0.2,Iris-setosa,1,5.059971,2.835939,4.737343,2.284797,Lily
1,4.9,3.0,1.4,0.2,Iris-setosa,2,8.228929,4.724798,1.512085,1.191153,Orchid
2,4.7,3.2,1.3,0.2,Iris-setosa,3,7.025967,2.718686,1.31009,0.342152,Lily
3,4.6,3.1,1.5,0.2,Iris-setosa,4,6.292622,2.434685,4.188128,2.353726,Orchid
4,5.0,3.6,1.4,0.2,Iris-setosa,5,3.858103,3.468358,4.243811,1.719372,Rose


In [15]:
# Drop duplicate columns if they exist
columns_to_drop = ['Sepal_Length_y', 'Sepal_Width_y', 'Petal_Length_y', 'Petal_Width_y', 'Species_y']
merged_df = merged_df.drop(columns=[col for col in columns_to_drop if col in merged_df.columns])

# Rename columns for clarity
merged_df = merged_df.rename(columns={
    'Sepal_Length_x': 'Sepal_Length',
    'Sepal_Width_x': 'Sepal_Width',
    'Petal_Length_x': 'Petal_Length',
    'Petal_Width_x': 'Petal_Width',
    'Species_x': 'Species'
})

print("✅ Cleaned Merged Dataset:")
display(merged_df.head())


✅ Cleaned Merged Dataset:


Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species,Flower_ID
0,5.1,3.5,1.4,0.2,Iris-setosa,1
1,4.9,3.0,1.4,0.2,Iris-setosa,2
2,4.7,3.2,1.3,0.2,Iris-setosa,3
3,4.6,3.1,1.5,0.2,Iris-setosa,4
4,5.0,3.6,1.4,0.2,Iris-setosa,5


In [17]:
# Define save path
merged_dataset_path = r"C:\Users\ashna\Test\results\Final_Merged_Dataset.csv"

# Save the dataset
merged_df.to_csv(merged_dataset_path, index=False)

print(f"✅ Merged dataset saved successfully at: {merged_dataset_path}")


✅ Merged dataset saved successfully at: C:\Users\ashna\Test\results\Final_Merged_Dataset.csv
