In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
import ast


In [2]:
# Load the feature dataset
data_path = r"C:\Users\User\Desktop\project\handcrafted_features\output.csv"
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Feature dataset not found at {data_path}")
df = pd.read_csv(data_path)


In [3]:
import pandas as pd
import ast

def safe_literal_eval(val):
    try:
        return ast.literal_eval(val)
    except (ValueError, SyntaxError):
        return None  # or any other appropriate default value or error handling

def process_list_column(df, col_name):
    # Convert string representation of lists to actual lists
    df[col_name] = df[col_name].apply(safe_literal_eval)
    
    # Flatten the lists in the feature column
    filtered_series = df[col_name].apply(lambda x: x if isinstance(x, list) and len(x) > 0 else [0])
    
    # Expand the lists into separate columns
    expanded_cols = pd.DataFrame(filtered_series.tolist(), index=filtered_series.index,
                                  columns=[f"{col_name}_{i+1}" for i in range(len(filtered_series.iloc[0]))])
    
    # Concatenate the expanded columns to the original DataFrame
    df = pd.concat([df, expanded_cols], axis=1)
    
    # Drop the original column
    df.drop(col_name, axis=1, inplace=True)
    
    return df

# Example usage
cols_to_process = ['Hue_Histogram', 'Saturation_Histogram', 'Value_Histogram','Moments_R', 'Moments_G', 'Moments_B', 
                   'LBP_Features', 'Zernike_Moments', 'Hu_Moments','Gabor_Features']



for col in cols_to_process:
    df = process_list_column(df, col)

print(df.dtypes)


Class                object
Filename             object
Contrast            float64
Dissimilarity       float64
Homogeneity         float64
                     ...   
Gabor_Features_4    float64
Gabor_Features_5    float64
Gabor_Features_6    float64
Gabor_Features_7    float64
Gabor_Features_8    float64
Length: 840, dtype: object


In [4]:
print(df.head)

<bound method NDFrame.head of                     Class                      Filename    Contrast  \
0      Alstonia scholaris  adjusted_image_0003_0001.JPG  109.993934   
1      Alstonia scholaris  adjusted_image_0003_0002.JPG  163.963365   
2      Alstonia scholaris  adjusted_image_0003_0003.JPG  123.790098   
3      Alstonia scholaris  adjusted_image_0003_0004.JPG  188.154168   
4      Alstonia scholaris  adjusted_image_0003_0005.JPG  135.426970   
...                   ...                           ...         ...   
15395   Terminalia arjuna    zoomed_image_0002_0193.JPG   80.025765   
15396   Terminalia arjuna    zoomed_image_0002_0217.JPG   86.074852   
15397   Terminalia arjuna    zoomed_image_0002_0218.JPG   77.640875   
15398   Terminalia arjuna    zoomed_image_0002_0219.JPG   89.022061   
15399   Terminalia arjuna    zoomed_image_0002_0220.JPG   73.734906   

       Dissimilarity  Homogeneity    Energy  Correlation     Area  \
0           6.079416     0.255005  0.091430     

In [5]:
# Save the DataFrame to a CSV file
output_dir=r"C:\Users\User\Desktop\project\handcrafted_features"
df.to_csv(os.path.join(output_dir, 'extracted.csv'), index=False)



In [10]:
df.isnull().sum()

Class               0
Filename            0
Contrast            0
Dissimilarity       0
Homogeneity         0
                   ..
Gabor_Features_4    0
Gabor_Features_5    0
Gabor_Features_6    0
Gabor_Features_7    0
Gabor_Features_8    0
Length: 840, dtype: int64