In [4]:
import pickle
import numpy as np
import os
from sklearn.manifold import Isomap
from sklearn.cluster import KMeans

def inspect_pickle_file(pickle_path):
    """Inspect what's inside your pickle file"""
    print(f"\n{'='*50}")
    print(f"INSPECTING: {pickle_path}")
    print(f"{'='*50}")
    
    try:
        with open(pickle_path, 'rb') as f:
            data = pickle.load(f)
        
        print(f"Type: {type(data)}")
        
        if isinstance(data, dict):
            print(f"Keys: {list(data.keys())}")
            
            for key, value in data.items():
                print(f"\n--- {key} ---")
                print(f"Type: {type(value)}")
                
                if isinstance(value, np.ndarray):
                    print(f"Shape: {value.shape}")
                    print(f"Dtype: {value.dtype}")
                    if len(value.shape) == 2:
                        print(f"Sample values (first row): {value[0][:5]}...")
                elif isinstance(value, list):
                    print(f"List length: {len(value)}")
                    if len(value) > 0:
                        first_item = value[0]
                        print(f"First item type: {type(first_item)}")
                        if isinstance(first_item, np.ndarray):
                            print(f"First item shape: {first_item.shape}")
                elif hasattr(value, 'cluster_centers_'):
                    print(f"Cluster model with {len(value.cluster_centers_)} clusters")
                    print(f"Cluster centers shape: {value.cluster_centers_.shape}")
                elif hasattr(value, '__dict__'):
                    attrs = [attr for attr in dir(value) if not attr.startswith('_')]
                    print(f"Object attributes: {attrs[:10]}...")  # Show first 10
                else:
                    print(f"Value preview: {str(value)[:100]}...")
        
        return data
    
    except Exception as e:
        print(f"Error loading {pickle_path}: {e}")
        return None

def create_compatible_pickle(original_data, output_path, original_embeddings_512d=None):
    """
    Create a new pickle file compatible with your service
    
    Args:
        original_data: Data from your original pickle file
        output_path: Where to save the new pickle file
        original_embeddings_512d: If you have access to original 512D embeddings
    """
    try:
        if isinstance(original_data, dict):
            # Create new compatible structure
            new_data = {}
            
            # Copy the clustering model
            if 'final_model' in original_data:
                new_data['final_model'] = original_data['final_model']
            elif any('model' in str(key).lower() for key in original_data.keys()):
                model_key = next(key for key in original_data.keys() if 'model' in str(key).lower())
                new_data['final_model'] = original_data[model_key]
            
            # If you have original 512D embeddings, add them
            if original_embeddings_512d is not None:
                new_data['original_face_encodings'] = np.array(original_embeddings_512d)
                print(f"Added original embeddings with shape: {np.array(original_embeddings_512d).shape}")
            
            # Copy other useful data
            for key, value in original_data.items():
                if key not in ['final_model'] and not key.startswith('_'):
                    new_data[key] = value
            
            # Save the new pickle file
            with open(output_path, 'wb') as f:
                pickle.dump(new_data, f)
            
            print(f"Created compatible pickle file: {output_path}")
            return True
            
        else:
            print("Original data is not a dictionary, creating new structure...")
            new_data = {
                'final_model': original_data,
                'original_face_encodings': original_embeddings_512d if original_embeddings_512d is not None else None
            }
            
            with open(output_path, 'wb') as f:
                pickle.dump(new_data, f)
            
            print(f"Created new pickle structure: {output_path}")
            return True
            
    except Exception as e:
        print(f"Error creating compatible pickle: {e}")
        return False

# Usage example:
def fix_your_pickle_files():
    """Main function to fix your pickle files"""
    
    # Inspect current files
    male_data = inspect_pickle_file('C:/Users/krexw/Documents/GitHub/uas_2/models/male_cluster_model.pkl')
    female_data = inspect_pickle_file('C:/Users/krexw/Documents/GitHub/uas_2/models/female_cluster_model.pkl')
    
    # If you don't have original 512D embeddings, we'll work with what we have
    # This creates a quick fix version
    if male_data:
        create_compatible_pickle(
            male_data, 
            'models/male_cluster_model_fixed.pkl'
        )
    
    if female_data:
        create_compatible_pickle(
            female_data, 
            'models/female_cluster_model_fixed.pkl'
        )
    
    print("\n" + "="*50)
    print("RECOMMENDATIONS:")
    print("="*50)
    print("1. Use the quick fix in your service (Option 1 above)")
    print("2. If possible, go back to your Jupyter notebook and:")
    print("   - Extract the original 512D face embeddings")
    print("   - Save them along with your trained models")
    print("   - Recreate the pickle files with proper structure")
    print("3. Update your model loading to use the fixed files")

# Run this to inspect your files
fix_your_pickle_files()


INSPECTING: C:/Users/krexw/Documents/GitHub/uas_2/models/male_cluster_model.pkl
Type: <class 'dict'>
Keys: ['final_model', 'final_labels', 'best_features', 'best_configuration', 'feature_info', 'dimensionality_reduction', 'all_results', 'metadata', 'data_references', 'scaler']

--- final_model ---
Type: <class 'sklearn.cluster._kmeans.KMeans'>
Cluster model with 10 clusters
Cluster centers shape: (10, 512)

--- final_labels ---
Type: <class 'numpy.ndarray'>
Shape: (324,)
Dtype: int32

--- best_features ---
Type: <class 'numpy.ndarray'>
Shape: (324, 2)
Dtype: float64
Sample values (first row): [-21.79756512 -20.87894985]...

--- best_configuration ---
Type: <class 'dict'>
Value preview: {'method': 'K-Means', 'k_value': 10, 'parameters': {'k': 10, 'init': 'k-means++', 'random_state': 42...

--- feature_info ---
Type: <class 'dict'>
Value preview: {'original_face_encodings': array([[-0.32282448, -1.4099492 ,  0.6714958 , ...,  0.81440425,
       ...

--- dimensionality_reduction ---
Type