In [2]:
import pandas as pd
import numpy as np
import joblib
import logging
from sklearn.exceptions import NotFittedError

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def load_model(model_path):
    """
    Load the saved Random Forest model.
    
    Args:
    model_path (str): Path to the saved model file.
    
    Returns:
    object: Loaded model object.
    """
    try:
        model = joblib.load(model_path)
        logging.info(f"Model loaded successfully from {model_path}")
        return model
    except FileNotFoundError:
        logging.error(f"Model file not found at {model_path}")
        raise
    except Exception as e:
        logging.error(f"Error loading model: {str(e)}")
        raise

def read_excel(file_path):
    """
    Read data from an Excel file.
    
    Args:
    file_path (str): Path to the Excel file.
    
    Returns:
    pandas.DataFrame: DataFrame containing the data.
    """
    try:
        df = pd.read_excel(file_path)
        logging.info(f"Data loaded successfully from {file_path}")
        return df
    except FileNotFoundError:
        logging.error(f"Excel file not found at {file_path}")
        raise
    except Exception as e:
        logging.error(f"Error reading Excel file: {str(e)}")
        raise

def make_predictions(model, data):
    """
    Make predictions using the loaded model.
    
    Args:
    model (object): Loaded model object.
    data (pandas.DataFrame): Input data for predictions.
    
    Returns:
    numpy.ndarray: Array of predictions.
    """
    try:
        predictions = model.predict(data)
        # Convert numeric predictions to 'ja' or 'nein'
        predictions = np.where(predictions == 1, 'ja', 'nein')
        logging.info("Predictions made successfully")
        return predictions
    except NotFittedError:
        logging.error("The model is not fitted yet. It cannot be used for predictions.")
        raise
    except Exception as e:
        logging.error(f"Error making predictions: {str(e)}")
        raise

def main(model_path, data_path):
    """
    Main function to load model, read data, and make predictions.
    
    Args:
    model_path (str): Path to the saved model file.
    data_path (str): Path to the input Excel file.
    """
    try:
        # Load the model
        model = load_model(model_path)
        
        # Read the new data
        new_data = read_excel(data_path)
        
        # Select features (make sure these match the features used during training)
        selected_features = ['InfoVeranstaltung', 'Distance_to_71034', 'Note Bachelor', 
                             'Zweitbewerbung', 'Fachrichtung Bachelor', 'Note HZB']
        X = new_data[selected_features]
        
        # Make predictions
        predictions = make_predictions(model, X)
        
        # Add predictions to the DataFrame in a new column
        new_data['Vorhersage_Immatrikulation'] = predictions
        
        # Reorder columns to put the new prediction column at position L (index 11)
        columns = list(new_data.columns)
        columns.insert(11, columns.pop(columns.index('Vorhersage_Immatrikulation')))
        new_data = new_data[columns]
        
        # Save results
        output_path = 'predictions_output.xlsx'
        new_data.to_excel(output_path, index=False)
        logging.info(f"Predictions saved to {output_path}")
        
        print(f"Predictions completed and saved to {output_path}")
        
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        print(f"An error occurred. Please check the log for details.")

if __name__ == "__main__":
    model_path = 'random_forest_model.joblib'
    data_path = 'dummy_data.xlsx'  # Replace with your new data file
    main(model_path, data_path)

2024-10-05 22:50:15,982 - INFO - Model loaded successfully from random_forest_model.joblib
2024-10-05 22:50:15,998 - INFO - Data loaded successfully from dummy_data.xlsx
2024-10-05 22:50:16,013 - INFO - Predictions made successfully
2024-10-05 22:50:16,035 - INFO - Predictions saved to predictions_output.xlsx


Predictions completed and saved to predictions_output.xlsx
