In [6]:
!pip install import_ipynb

import import_ipynb


import amorenet_alt as amorenet
import cliffGauss_alt as cliffGauss 
import choosePoints_alt as choosePoints
import testModel_alt as testModel





In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def dopdt(model,                # model constructor function
          modelpar,             # model parameters
          chooser,              # point chooser class
          chooserpar,           # point chooser parameters
          dat,                  # data generator object
          Npop=100,             # population size
          Ngen=10,              # number of generations
          pause=True,           # whether to pause between generations
          doplot=True,          # whether to create plots
          saveplot=False):      # whether to save plots
    """
    Python implementation of Evolutionary Design of Experiments (EDoE)
    Originally by N. Packard, ProtoLife, May 2008
    Enhanced with DataFrame result tracking
    """
    res = []  # to accumulate results
    generation_data = []  # for DataFrame
    
    ## First generation:
    ## random sample of points
    print("Starting PDT - Generation 1")
    
    # Get dimension from data generator
    d = dat.d  
    
    # Generate random training points for first generation
    train = np.random.uniform(0, 1, (Npop, d))  # random points to train on
    targ = sample_data(dat, train)              # sample data generator 
    
    # Debug: Print initial shapes
    print(f"Initial train shape: {train.shape}")
    print(f"Initial targ shape: {targ.shape}")
    
    # Ensure targets are 2D
    if targ.ndim == 1:
        targ = targ.reshape(-1, 1)
    print(f"Reshaped targ shape: {targ.shape}") 
    
    # Train initial model
    modelpar_copy = modelpar.copy()  # Don't modify original
    modelpar_copy['x'] = train
    modelpar_copy['y'] = targ
    curmodel = model(**modelpar_copy)
    
    print(f"---------------------------------Finished generation 1")
    
    # Test the model
    try:
        import testModel
        test_result = testModel.test_model(curmodel, dat, train, errmx=0.2)
    except ImportError:
        print("testModel module not found, using simple test")
        test_result = simple_test_model(curmodel, dat, train, doplot=doplot)
    
    res.append(test_result)
    
    # Capture generation 1 metrics
    gen_metrics = {
        'generation': 1,
        'n_train_points': len(train),
        'n_new_points': Npop,
        'mse': test_result.get('mse', np.nan),
        'mean_error': test_result.get('meanerr', np.nan),
        'max_error': test_result.get('maxerr', np.nan),
        'min_target': float(np.min(targ)),
        'max_target': float(np.max(targ)),
        'mean_target': float(np.mean(targ)),
        'std_target': float(np.std(targ))
    }
    generation_data.append(gen_metrics)
    
    if saveplot:
        plt.savefig('pdfplot1.pdf')
    
    ## Main evolutionary loop
    for g in range(2, Ngen+1):
        print(f"Starting generation {g}")
        
        if pause:
            input("Enter to continue, Ctrl+C to exit.")
        
        # Create chooser instance with updated parameters
        chooser_params = chooserpar.copy()
        chooser_params['N'] = Npop
        chooser_params['d'] = d
        chooser_obj = chooser(**chooser_params)
        
        # Choose new points using the current model
        new = chooser_obj.choose(curmodel)
        
        # Debug: Print shapes to understand the issue
        print(f"Current train shape: {train.shape}")
        print(f"Current targ shape: {targ.shape}")
        print(f"New points shape: {new.shape}")
        
        # Add new points to training set
        train = np.vstack((train, new))
        new_targ = sample_data(dat, new)
        print(f"New targets shape: {new_targ.shape}")
        
        # Ensure targets have consistent shape
        if new_targ.ndim == 1:
            new_targ = new_targ.reshape(-1, 1)
        if targ.ndim == 1:
            targ = targ.reshape(-1, 1)
            
        targ = np.vstack((targ, new_targ))
        print(f"Combined train shape: {train.shape}")
        print(f"Combined targ shape: {targ.shape}")
        
        # Train updated model with all data
        modelpar_copy = modelpar.copy()
        modelpar_copy['x'] = train
        modelpar_copy['y'] = targ
        curmodel = model(**modelpar_copy)
        
        # Test the updated model
        print(f"---------------------------------Finished generation {g}")
        try:
            import testModel
            test_result = testModel.test_model(curmodel, dat, train, errmx=0.2, new=Npop, doplot=doplot)
        except ImportError:
            print("testModel module not found, using simple test")
            test_result = simple_test_model(curmodel, dat, train, doplot=doplot)
        
        res.append(test_result)
        
        # Capture generation metrics
        gen_metrics = {
            'generation': g,
            'n_train_points': len(train),
            'n_new_points': Npop,
            'mse': test_result.get('mse', np.nan),
            'mean_error': test_result.get('meanerr', np.nan),
            'max_error': test_result.get('maxerr', np.nan),
            'min_target': float(np.min(targ)),
            'max_target': float(np.max(targ)),
            'mean_target': float(np.mean(targ)),
            'std_target': float(np.std(targ)),
            'new_points_min': float(np.min(new_targ)),
            'new_points_max': float(np.max(new_targ)),
            'new_points_mean': float(np.mean(new_targ)),
            'new_points_std': float(np.std(new_targ))
        }
        generation_data.append(gen_metrics)
        
        if saveplot:
            plt.savefig(f'pdfplot{g}.pdf')
    
    # Create DataFrame from collected metrics
    df_results = pd.DataFrame(generation_data)
    
    return res, df_results

def sample_data(dat, points):
    """
    Sample data from generator at specified points
    Assumes `dat` has a sample_data method or similar
    """
    try:
        # Try the method name you're using
        return dat.sample_data(points)
    except AttributeError:
        # Try alternative method names
        try:
            return dat.sample(points)
        except AttributeError:
            try:
                return dat(points)
            except:
                raise AttributeError(f"Data generator {type(dat)} doesn't have a recognized sampling method")

def get_targs(pdt):
    """
    Utility to get targets (fitness) of last generation
    """
    if not pdt or 'targs' not in pdt[0]:
        print("Warning: No 'targs' found in PDT results")
        return np.array([])
    
    Npop = len(pdt[0]['targs'])
    targs = [p['targs'][-Npop:] for p in pdt]
    targall = []
    for tt in targs:
        foo = sorted(tt)
        targall.extend(foo)
    return np.array(targall)

def get_meanerrs(pdt):
    """
    Get mean errors from PDT results
    """
    return np.array([p.get('meanerr', 0) for p in pdt])

def simple_test_model(model, dat, train_points, errmx=0.2, new=None, doplot=False):
    """
    Simple model testing function
    Returns basic statistics about the model
    """
    # Generate test points
    test_points = np.random.uniform(0, 1, (100, dat.d))
    
    # Get true values and predictions
    true_vals = sample_data(dat, test_points)
    pred_vals = model.predict(test_points)
    
    # Calculate error
    if true_vals.shape != pred_vals.shape:
        # Handle shape mismatch
        if true_vals.ndim == 1:
            true_vals = true_vals.reshape(-1, 1)
        if pred_vals.ndim == 1:
            pred_vals = pred_vals.reshape(-1, 1)
    
    errors = np.abs(true_vals - pred_vals)
    mse = np.mean((true_vals - pred_vals) ** 2)
    mae = np.mean(errors)
    
    result = {
        'mse': mse,
        'mae': mae,
        'meanerr': mae,
        'maxerr': float(np.max(errors)),
        'train_points': len(train_points),
        'test_points': len(test_points)
    }
    
    if doplot:
        try:
            plt.figure(figsize=(10, 6))
            plt.subplot(1, 2, 1)
            plt.scatter(true_vals.flatten(), pred_vals.flatten(), alpha=0.6)
            plt.plot([true_vals.min(), true_vals.max()], [true_vals.min(), true_vals.max()], 'r--')
            plt.xlabel('True Values')
            plt.ylabel('Predicted Values')
            plt.title('Predictions vs True Values')
            
            plt.subplot(1, 2, 2)
            plt.scatter(range(len(train_points)), sample_data(dat, train_points).flatten(), 
                       c='blue', label='Training Data', alpha=0.7)
            plt.xlabel('Point Index')
            plt.ylabel('Target Value')
            plt.title('Training Data Distribution')
            plt.legend()
            plt.tight_layout()
            plt.show()
        except Exception as e:
            print(f"Plotting failed: {e}")
    
    return result

def plot_generation_progress(df_results):
    """
    Plot the progress across generations
    
    Parameters:
    -----------
    df_results : pandas.DataFrame
        DataFrame returned by dopdt function
    """
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Plot MSE over generations
    axes[0, 0].plot(df_results['generation'], df_results['mse'], marker='o', linewidth=2)
    axes[0, 0].set_xlabel('Generation')
    axes[0, 0].set_ylabel('MSE')
    axes[0, 0].set_title('Model Error Over Generations')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot number of training points
    axes[0, 1].plot(df_results['generation'], df_results['n_train_points'], marker='s', linewidth=2, color='green')
    axes[0, 1].set_xlabel('Generation')
    axes[0, 1].set_ylabel('Number of Training Points')
    axes[0, 1].set_title('Training Set Growth')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot target statistics
    axes[1, 0].plot(df_results['generation'], df_results['mean_target'], marker='o', label='Mean', linewidth=2)
    axes[1, 0].fill_between(df_results['generation'], 
                            df_results['mean_target'] - df_results['std_target'],
                            df_results['mean_target'] + df_results['std_target'],
                            alpha=0.3, label='Â±1 Std Dev')
    axes[1, 0].set_xlabel('Generation')
    axes[1, 0].set_ylabel('Target Value')
    axes[1, 0].set_title('Target Distribution Over Generations')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot error metrics if available
    if 'mean_error' in df_results.columns:
        axes[1, 1].plot(df_results['generation'], df_results['mean_error'], marker='o', label='Mean Error', linewidth=2)
        if 'max_error' in df_results.columns:
            axes[1, 1].plot(df_results['generation'], df_results['max_error'], marker='s', label='Max Error', linewidth=2)
        axes[1, 1].set_xlabel('Generation')
        axes[1, 1].set_ylabel('Error')
        axes[1, 1].set_title('Error Metrics Over Generations')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

# Example usage:
"""
results, df_results = dopdt(model, modelpar, chooser, chooserpar, dat, 
                             Npop=100, Ngen=10, pause=False, doplot=False)

# Display results
print(df_results)

# Save to CSV
df_results.to_csv('generation_results.csv', index=False)

# Plot progress
plot_generation_progress(df_results)
plt.show()
"""

"\nresults, df_results = dopdt(model, modelpar, chooser, chooserpar, dat, \n                             Npop=100, Ngen=10, pause=False, doplot=False)\n\n# Display results\nprint(df_results)\n\n# Save to CSV\ndf_results.to_csv('generation_results.csv', index=False)\n\n# Plot progress\nplot_generation_progress(df_results)\nplt.show()\n"