In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
import streamlit as st
import pickle

In [None]:
data = pd.read_csv('/content/Copper_Set.csv')

print("Initial dataset shape:", data.shape)

data['material_ref'] = data['material_ref'].replace('00000', np.nan)
print("Shape after replacing '00000' with NaN in material_ref:", data.shape)

Initial dataset shape: (33540, 14)
Shape after replacing '00000' with NaN in material_ref: (33540, 14)


In [None]:
for col in ['thickness', 'width', 'selling_price']:
    data[col] = data[col].fillna(data[col].median())
print("Shape after filling missing values for thickness, width, and selling_price:", data.shape)

for col in ['material_ref', 'status', 'item type']:
    data[col] = data[col].fillna('-1')
print("Shape after filling missing values for material_ref, status, and item type:", data.shape)

Shape after filling missing values for thickness, width, and selling_price: (33540, 14)
Shape after filling missing values for material_ref, status, and item type: (33540, 14)


In [None]:
regression_features = ['quantity tons', 'thickness', 'width', 'material_ref', 'item type']
data[regression_features] = data[regression_features].apply(pd.to_numeric, errors='coerce')
print("Shape after converting regression features to numeric:", data.shape)

print("Number of NaN values per column before dropping:", data[regression_features].isna().sum())

data = data.dropna(subset=regression_features)
print("Shape after dropping rows with NaN in regression features:", data.shape)

Shape after converting regression features to numeric: (33540, 14)
Number of NaN values per column before dropping: quantity tons        0
thickness            0
width                0
material_ref     14878
item type        33539
dtype: int64
Shape after dropping rows with NaN in regression features: (1, 14)


In [None]:

if data.empty:
    raise ValueError("Dataset is empty after cleaning. Please ensure the input data contains valid entries in all required columns.")

label_encoder = LabelEncoder()
data['status'] = label_encoder.fit_transform(data['status'])  # WON = 1, LOST = 0
print("Shape after encoding status:", data.shape)

scaler = StandardScaler()
data[regression_features] = scaler.fit_transform(data[regression_features])
print("Shape after scaling numeric features:", data.shape)

X_reg = data[regression_features]
y_reg = data['selling_price']
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

In [None]:
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train_reg, y_train_reg)
reg_preds = regressor.predict(X_test_reg)

X_clf = data[classification_features]
y_clf = data['status']
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)

classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train_clf, y_train_clf)
clf_preds = classifier.predict(X_test_clf)

print("Regression RMSE:", np.sqrt(mean_squared_error(y_test_reg, reg_preds)))
print("Classification Accuracy:", accuracy_score(y_test_clf, clf_preds))
print("Classification Report:\n", classification_report(y_test_clf, clf_preds))

In [None]:
with open('regressor.pkl', 'wb') as f:
    pickle.dump(regressor, f)
with open('classifier.pkl', 'wb') as f:
    pickle.dump(classifier, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

st.title("Industrial Copper Modeling")

option = st.selectbox("Select Task:", ["Regression", "Classification"])

In [None]:

if option == "Regression":
    st.header("Predict Selling Price")
    input_data = {
        "quantity tons": st.number_input("Quantity (Tons)"),
        "thickness": st.number_input("Thickness"),
        "width": st.number_input("Width"),
        "material_ref": st.text_input("Material Reference"),
        "item type": st.text_input("Item Type"),
    }

    input_df = pd.DataFrame([input_data])
    input_df[regression_features] = scaler.transform(input_df[regression_features])
    prediction = regressor.predict(input_df)
    st.write("Predicted Selling Price:", prediction[0])

In [None]:
elif option == "Classification":
    st.header("Predict Status")
    input_data = {
        "quantity tons": st.number_input("Quantity (Tons)"),
        "thickness": st.number_input("Thickness"),
        "width": st.number_input("Width"),
        "material_ref": st.text_input("Material Reference"),
        "item type": st.text_input("Item Type"),
    }

    input_df = pd.DataFrame([input_data])
    input_df[classification_features] = scaler.transform(input_df[classification_features])
    prediction = classifier.predict(input_df)
    st.write("Predicted Status:", "WON" if prediction[0] == 1 else "LOST")