In [7]:
import streamlit as st
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib
import threading

# List of file names
file_names = [
    "Aroma_Acidity_1.txt",
    "Aroma_Body_2.txt",
    "Aroma_Balance_3.txt",
    "Acidity_Body_4.txt",
    "Acidity_Balance_5.txt",
    "Body_Balance_6.txt"
]

# Initialize a list to store numeric features for each file
numeric_features_list = []

# Initialize Streamlit app
st.title("Model Evaluation App")

# Create a selectbox to choose a file
selected_file = st.selectbox("Select a file", file_names)

# Define the thread variable
evaluation_thread = None

# Function to run model evaluation
def evaluate_model(file_name):
    global evaluation_thread

    # Loop through the file names
    for file_name in file_names:
        if selected_file == file_name:
            # Extract feature names from the file name
            features = file_name.split("_")[:2]
            numeric_features_list.append(features)

            # Read in the data from the file
            file_path = os.path.join(".", file_name)
            df = pd.read_table(file_path)

            # Separate features and target
            X = df.drop(columns=['Cupper.Points'])
            y = df['Cupper.Points']

            # Split the data into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Define preprocessing steps for numerical and categorical features
            categorical_features = ['CountryOfOrigin']

            numeric_transformer = Pipeline(steps=[
                ('scaler', StandardScaler())
            ])

            categorical_transformer = Pipeline(steps=[
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
            ])

            # Combine the preprocessing steps for both types of features
            preprocessor = ColumnTransformer(transformers=[
                ('num', numeric_transformer, features),
                ('cat', categorical_transformer, categorical_features)
            ])

            # Create the regression model
            model = Pipeline(steps=[
                ('preprocessor', preprocessor),
                ('regressor', LinearRegression())
            ])

            # Fit the model on the training data
            model.fit(X_train, y_train)

            # Make predictions on the test data
            y_pred = model.predict(X_test)

            # Calculate the mean squared error to evaluate the model's performance
            mse = mean_squared_error(y_test, y_pred)
            st.write(f"Mean Squared Error for {file_name}: {mse:.2f}")

            # Save the fitted model as a pickle file
            model_pickle_filename = f"model_{file_name.split('.')[0]}.pkl"
            joblib.dump(model, model_pickle_filename)

            # You can also access the numeric_features for the current file
            st.write(f"Numeric Features for {file_name}: {features}")

# Start the model evaluation in a separate thread
if st.button("Evaluate Model"):
    evaluation_thread = threading.Thread(target=evaluate_model, args=(selected_file,))
    evaluation_thread.start()

# Display results
if evaluation_thread and evaluation_thread.is_alive():
    st.text("Model evaluation is in progress. Please wait...")
else:
    # Display the results once the evaluation is complete
    model_pickle_filename = f"model_{selected_file.split('.')[0]}.pkl"
    model = joblib.load(model_pickle_filename)

    # Display model details
    st.subheader("Model Details")
    st.text("Model was trained with the following features:")
    st.write(numeric_features_list)

    # Display model evaluation results
    #mse = mean_squared_error(y_test, y_pred)  # Calculate MSE using the last evaluated model
    #st.subheader("Model Evaluation Results")
    #st.text(f"Mean Squared Error: {mse:.2f}")

In [1]:
def extract_feature_names(filename):
    # Extract the portion of the filename before the ".pkl" extension
    name_without_extension = re.match(r"model_(.+)\.pkl", filename).group(1)
    # Split the name using underscores
    feature_names = name_without_extension.split("_")
    return feature_names

In [4]:
import sklearn
print("Package Version:", sklearn.__version__)

Package Version: 1.0.2
