In [3]:
!pip install streamlit -q

In [4]:
%%writefile app.py
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import streamlit as st
st. set_page_config(layout="wide", page_icon=":FCD:")
st.set_option('deprecation.showPyplotGlobalUse', False)
import pandas as pd
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.style.use('Solarize_Light2')

from sklearn.inspection import permutation_importance
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.decomposition import PCA

#--------------------------------------------------------------------------------------------------------------------------------------------------------
start_time=time.time()  #Program Start time
#Titles
tit1,tit2 = st.columns((4, 1))
tit1.markdown("<h1 style='text-align: center;'><u>Fuel Classification for  Fire Calorimetry Database</u> </h1>",unsafe_allow_html=True)
tit2.image("/content/Fire.jpg")
st.sidebar.title("Dataset and Classifier")

dataset_name=st.sidebar.selectbox("Select Dataset: ",('Normal',"Augmented"))
classifier_name = st.sidebar.selectbox("Select Classifier: ",("Logistic Regression","Decision Trees","Bagging",
                                                              "Random Forest","AdaBoost Classifier"))

LE=LabelEncoder()
def get_dataset(dataset_name):
    if dataset_name=="Normal":
        data=pd.read_csv("/content/FCDorg.csv")
        st.header("Classification on Normal Data")
        return data

    else:
        data=pd.read_csv("/content/FCDaug.csv")
        st.header("Classification on Augmented Data")
        return data

data = get_dataset(dataset_name)

def selected_dataset(dataset_name):
    if dataset_name == "Normal":
        X=data.drop(["fuel_type"],axis=1)
        Y=data.fuel_type
        return X,Y

    elif dataset_name == "Augmented":
        X = data.drop(["fuel_type"], axis=1)
        Y = data.fuel_type
        return X,Y
lables = ["Cellulose", "Methane", "Natural Gas","Other Fuels", "Plastic", "Propane", "Wood (MDF)"]
X,Y=selected_dataset(dataset_name)

#Plot output variable
def plot_op(dataset_name):
    col1, col2 = st.columns((1, 5))
    plt.figure(figsize=(12, 3))
    plt.title("Classes in 'Y'")
    if dataset_name == "Normal":
        col1.write(Y)
        sns.countplot(Y)
        col2.pyplot()

    elif dataset_name == "Augmented":
        col1.write(Y)
        sns.countplot(Y)
        col2.pyplot()

st.write(data)
st.write("Shape of dataset: ",data.shape)
st.write("Number of classes: ",Y.nunique())
plot_op(dataset_name)


def add_parameter_ui(clf_name):
    params={}
    st.sidebar.write("Select values: ")

    if clf_name == "Logistic Regression":
        R = st.sidebar.slider("Regularization",0.1,10.0,step=0.1)
        MI = st.sidebar.slider("max_iter",50,400,step=50)
        params["R"] = R
        params["MI"] = MI


    elif clf_name == "Decision Trees":
        M = st.sidebar.slider("max_depth", 2, 20)
        C = st.sidebar.selectbox("Criterion", ("gini", "entropy"))
        SS = st.sidebar.slider("min_samples_split",2,10)
        params["M"] = M
        params["C"] = C
        params["SS"] = SS

    elif clf_name == "Bagging":
        N = st.sidebar.slider("n_estimators",1,50,step=1,value=10)
        params["N"] = N
        
    elif clf_name == "Random Forest":
        N = st.sidebar.slider("n_estimators",10,300,step=20,value=100)
        M = st.sidebar.slider("max_depth",2,20)
        C = st.sidebar.selectbox("Criterion",("gini","entropy"))
        F = st.sidebar.selectbox("max_features",("auto","sqrt", "log2"))
        params["N"] = N
        params["M"] = M
        params["C"] = C
        params["F"] = F

    elif clf_name == "AdaBoost Classifier":
        N = st.sidebar.slider("n_estimators", 10, 300, step=40, value=10)
        LR = st.sidebar.slider("Learning Rate", 0.01, 10.0,1.0)
        params["N"] = N
        params["LR"] = LR


    RS=st.sidebar.slider("Random State",0,100)
    params["RS"] = RS
    return params

params = add_parameter_ui(classifier_name)

def get_classifier(clf_name,params):
    global clf
    if clf_name == "Logistic Regression":
        clf = LogisticRegression(C=params["R"],max_iter=params["MI"])

    elif clf_name == "Decision Trees":
        clf = DecisionTreeClassifier(max_depth=params["M"],criterion=params["C"],min_samples_split= params["SS"])
        
    elif clf_name == "Bagging":
        clf = BaggingClassifier(n_estimators = params["N"])

    elif clf_name == "Random Forest":
        clf = RandomForestClassifier(n_estimators=params["N"],max_depth=params["M"],criterion=params["C"], max_features=params["F"])

    elif clf_name == "AdaBoost Classifier":
        clf = AdaBoostClassifier(n_estimators=params["N"],learning_rate=params["LR"])        

    return clf

clf = get_classifier(classifier_name,params)

#Build Model
def model():
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)
    
    if classifier_name == "Logistic Regression":
        #MinMax Scaling / Normalization of data
        Std_scaler = StandardScaler()
        X_train = Std_scaler.fit_transform(X_train)
        X_test = Std_scaler.transform(X_test)
    
        clf.fit(X_train,Y_train)
        Y_pred_train = clf.predict(X_train)
        Y_pred_test = clf.predict(X_test) 
        return X_train, X_test, Y_train, Y_pred_train, Y_pred_test ,Y_test
    else:
        clf.fit(X_train, Y_train)
        Y_pred_train = clf.predict(X_train)
        Y_pred_test = clf.predict(X_test)
        return X_train, X_test, Y_train, Y_pred_train, Y_pred_test ,Y_test

X_train, X_test, Y_train, Y_pred_train, Y_pred_test ,Y_test = model()

def conmatrix(X, y, model):
    return confusion_matrix(y, model.predict(X))

#Plot Output
def compute():
    #Plot PCA
    clf_result = permutation_importance(clf,X_train,Y_train, random_state = 0)
    cols = X.columns
    mean = clf_result.importances_mean
    std = clf_result.importances_std
    df_res = pd.DataFrame({'features':cols, 'importance':mean, 'importance_std': std}).sort_values('importance', ascending=False, ignore_index=True)
    plt.figure(figsize=(10,10))
    sns.barplot(x='importance',y='features', data = df_res, orient='h', xerr = df_res["importance_std"] )
    plt.title("Permutation Importance\n", fontsize=20);
    st.pyplot()

    #c1, c2 = st.columns((4,3))
    #Output plot
    plt.figure(figsize=(12,6))
    st.write("Permutation importance dataframe",df_res)
    var1 = df_res['features'][0]
    var2 = df_res['features'][1]
    st.write("Top 1 : ",var1)
    st.write("Top 2: ", var2)
    sns.scatterplot(x =data[var1] , y = data[var2] , hue = data['fuel_type'])
    plt.title("Distribution of Classes according to top 2 variables")
    st.pyplot()

    #Confusion Matrix
    c1, c2 = st.columns((4,4))
    cm = conmatrix(X_train, Y_train, clf)
    l = ["Cellulose", "Methane", "Natural Gas","Other Fuels", "Plastic", "Propane", "Wood (MDF)"]
    plt.figure(figsize=(12, 7.5))
    sns.heatmap(cm,annot=True,cmap='Blues',linewidths=2,fmt='g',xticklabels=l, yticklabels=l);
    plt.title("Confusion Matrix for Train",fontsize=15)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    c1.pyplot()
    
    
    cm = conmatrix(X_test, Y_test, clf)
    plt.figure(figsize=(12, 7.5))
    sns.heatmap(cm,annot=True,cmap='Blues',linewidths=2,fmt='g',xticklabels=l, yticklabels=l);
    plt.title("Confusion Matrix for test",fontsize=15)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    c2.pyplot()

    #Calculate Metrics
    acc = accuracy_score(Y_train,Y_pred_train)
    mse=mean_squared_error(Y_train,Y_pred_train)
    precision =  precision_score(Y_train, Y_pred_train, average='macro')
    recall = recall_score(Y_train, Y_pred_train,average='macro')
    fscore = f1_score(Y_train, Y_pred_train, average='macro')
    st.subheader("Training Metrics of the model: ")
    st.text('Precision: {} \nRecall: {} \nF1-Score: {} \nAccuracy: {} %\nMean Squared Error: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore,3), round((acc*100),3), round((mse),3)))
    
    
    acct = accuracy_score(Y_test,Y_pred_test)
    mset=mean_squared_error(Y_test,Y_pred_test)
    precisiont =  precision_score(Y_test, Y_pred_test, average='macro')
    recallt = recall_score(Y_test, Y_pred_test,average='macro')
    fscoret = f1_score(Y_test, Y_pred_test, average='macro')
    st.subheader("Testing Training Metrics of the model: ")
    st.text('Precision: {} \nRecall: {} \nF1-Score: {} \nAccuracy: {} %\nMean Squared Error: {}'.format(
        round(precisiont, 3), round(recallt, 3), round(fscoret,3), round((acct*100),3), round((mset),3)))

st.markdown("<hr>",unsafe_allow_html=True)
st.header(f"1) Model for Prediction of {dataset_name}")
st.subheader(f"Classifier Used: {classifier_name}")
compute()

#Execution Time
end_time=time.time()
st.info(f"Total execution time: {round((end_time - start_time),4)} seconds")


#Get user values
def user_inputs_ui(dataset_name,data):
    user_val = {}
    if dataset_name == "Normal":
        X = data.drop(["fuel_type"], axis=1)
        for col in X.columns:
            name = col
            col = st.number_input(col, min_value=(X[col].min()*1.00000), max_value=(X[col].max()*1.00000),format="%.5f")
            user_val[name] = round((col),4)

    elif dataset_name == "Augmented":
        X = data.drop(["fuel_type"], axis=1)
        for col in X.columns:
            name = col
            col = st.number_input(col, min_value=(X[col].min()*1.00000), max_value=(X[col].max()*1.00000),format="%.5f")
            user_val[name] = col

    return user_val

#User values
st.markdown("<hr>",unsafe_allow_html=True)
st.header("2) User Values")
with st.expander("See more"):
    st.markdown("""
    In this section you can use your own values to predict the target variable. 
    Input the required values below and you will get your status based on the values. <br>
    <p style='color: red;'> 0 - Cellulose </p>
    <p style='color: blue;'> 1 - Methane </p> 
    <p style='color: #FFD700;'> 2 - Natural Gas </p>
    <p style='color: green;'> 3 - Other Fuels </p>
    <p style='color: #FF00FF;'> 4 - Plastic</p> 
    <p style='color: #33F9FF;'> 5 - Propane </p>
    <p style='color: #FF3383;'> 6 - Wood (MDF) </p>
    """,unsafe_allow_html=True)

user_val = user_inputs_ui(dataset_name,data)

#@st.cache(suppress_st_warning=True)
def user_predict():
    global U_pred
    if dataset_name == "Normal":
        X = data.drop(["fuel_type"], axis=1)
        U_pred = clf.predict([[user_val[col] for col in X.columns]])

    elif dataset_name == "Augmented":
        X = data.drop(["fuel_type"], axis=1)
        U_pred = clf.predict([[user_val[col] for col in X.columns]])

    st.subheader("Your Status: ")
    if U_pred == 0:
        st.write(U_pred[0], " - The burnt material is Cellulose :)")
    if U_pred == 1:
        st.write(U_pred[0], " - The burnt material is Methane :)")
    if U_pred == 2:
        st.write(U_pred[0], " - The burnt material is Natural Gas :)")
    if U_pred == 3:
        st.write(U_pred[0], " - The burnt material is Other Fuels :)")
    if U_pred == 4:
        st.write(U_pred[0], " - The burnt material is Plastic :)")
    if U_pred == 5:
        st.write(U_pred[0], " - The burnt material is Propane :)")
    if U_pred == 6:
        st.write(U_pred[0], " - The burnt material is Wood (MDF) :)")
        
        
user_predict()  #Predict the status of user.


Writing app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501

2022-08-17 05:14:43.889 INFO    numexpr.utils: NumExpr defaulting to 2 threads.
[K[?25hnpx: installed 22 in 3.819s
your url is: https://weak-trams-worry-104-199-139-78.loca.lt
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.2:8501[0m
[34m  External URL: [0m[1mhttp://104.199.139.78:8501[0m
[0m
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  "X does not have valid feature names, but"
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  "X does not have valid feature names, but"
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(av