In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import os

In [3]:
# Data generation function
def generate_synthetic_data(n_samples=1000, n_features=20):
    """Generate synthetic classification data"""
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=15,
        n_redundant=5,
        random_state=42
    )
    
    # Convert to DataFrame
    feature_names = [f'feature_{i}' for i in range(n_features)]
    df = pd.DataFrame(X, columns=feature_names)
    df['target'] = y
    
    return df

In [4]:
# Save data function
def save_data(df, path='data/raw/synthetic_data.csv'):
    """Save DataFrame to CSV"""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df.to_csv(path, index=False)

# Generate and save data
df = generate_synthetic_data()
save_data(df)

In [5]:
# Model training function
def train_model(data_path='data/raw/synthetic_data.csv', model_path='models/model.pkl'):
    """Train a RandomForest model and save it"""
    df = pd.read_csv(data_path)
    X = df.drop('target', axis=1)
    y = df['target']
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    joblib.dump(model, model_path)

# Train and save the model
train_model()

In [6]:
# Model validation function
def validate_model(data_path='data/raw/synthetic_data.csv', model_path='models/model.pkl'):
    """Validate the trained model"""
    model = joblib.load(model_path)
    df = pd.read_csv(data_path)
    X = df.drop('target', axis=1)
    y = df['target']
    
    y_pred = model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    
    return accuracy

In [7]:
# Example usage
if __name__ == "__main__":
    # Generate and save data
    df = generate_synthetic_data()
    save_data(df)
    
    # Train and save model
    train_model()
    
    # Validate model
    accuracy = validate_model()
    print(f"Model validation accuracy: {accuracy:.4f}")

Model validation accuracy: 1.0000
