# Creating Necessary Files

In [1]:
import pandas as pd
import numpy as np
import openpyxl
import matplotlib.pyplot as plt
import matplotlib.dates
from matplotlib.dates import date2num
import seaborn as sns 
import warnings
%matplotlib inline
import pickle
from sklearn.pipeline import Pipeline

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV
from sklearn.preprocessing import scale 

warnings.filterwarnings('ignore')

# Load and split the data
Aurubis = pd.read_csv('Arubis.csv')
Aurubis = Aurubis.rename(columns={"Mchine Sequence": "Machine Sequence", "Start date": "Start Date"})

Aurubis = Aurubis.drop(['Finish lbs', 'Finish date', 'Order Id', 'Coil Id'], axis=1)
Aurubis = Aurubis[Aurubis['Actual Yield'] <= 1]
Aurubis = Aurubis.dropna()
Aurubis = Aurubis.drop(['Machine Sequence', 'Machine Id', 'Alloy Type'], axis=1)

# --------------------------------------------
data = Aurubis.copy()
data['Start Date'] = pd.to_datetime(data['Start Date'], 
 format = '%Y-%m-%d', 
 errors = 'coerce')
data = data.reset_index(drop=True)

# --------------------------------------------

Aurubis['Start Date'] = pd.to_datetime(Aurubis['Start Date'], 
 format = '%Y-%m-%d', 
 errors = 'coerce')
Aurubis['Start Date_Year'] = Aurubis['Start Date'].dt.year
Aurubis['Start Date_Month'] = Aurubis['Start Date'].dt.month
Aurubis['Start Date_Week'] = Aurubis['Start Date'].dt.week
Aurubis['Start Date_Day'] = Aurubis['Start Date'].dt.day
Aurubis['Start Date_Dayofweek'] = Aurubis['Start Date'].dt.dayofweek
Aurubis['Weekend'] = Aurubis['Start Date_Dayofweek']>4
y = Aurubis['Actual Yield']
X = Aurubis.drop(['Actual Yield','Standard Yield','Start Date'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

#------------------------------------------
X_test.reset_index(drop=True)

X_train.to_csv(index=False)
X_test.to_csv(index=False)
y_train.to_csv(index=False)
y_test.to_csv(index=False)

rf = RandomForestRegressor(max_depth=10, random_state=0, max_features = 10)
rf.fit(X, y)

lasso = LassoCV(alphas = None, cv = 10, max_iter = 100000, normalize = True)
lasso.fit(X, y)


alphas = 10**np.linspace(10,-2,100)*0.5
ridge = RidgeCV(alphas = alphas, scoring = 'neg_mean_squared_error', normalize = True)
ridge.fit(X, y)

# Save the data and pipeline
pickle.dump(data, open('data.sav', 'wb'))
pickle.dump(X, open('X.sav', 'wb'))
pickle.dump(y, open('y.sav', 'wb'))
pickle.dump(X_train, open('X_train.sav', 'wb'))
pickle.dump(y_train, open('y_train.sav', 'wb'))
pickle.dump(lasso, open('lasso.sav', 'wb'))
pickle.dump(ridge, open('ridge.sav', 'wb'))
pickle.dump(rf, open('rf.sav', 'wb'))
pickle.dump(X_test, open('X_test.sav', 'wb'))
pickle.dump(y_test, open('y_test.sav', 'wb'))


# Streamlit File

In [2]:
import streamlit as st
import pickle
import numpy as np
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import pandas as pd
from statistics import mean
import seaborn as sns, numpy as np
import matplotlib.pyplot as plt

from PyInstaller.utils.hooks import copy_metadata

datas = copy_metadata('streamlit')


# Load the pipeline and data
X_test = pickle.load(open('X_test.sav', 'rb'))
y_test = pickle.load(open('y_test.sav', 'rb'))
X = pickle.load(open('X.sav', 'rb'))
y = pickle.load(open('y.sav', 'rb'))
data = pickle.load(open('data.sav', 'rb'))


import base64
import textwrap
def render_svg(svg):
    """Renders the given svg string."""
    b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
    html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
    st.write(html, unsafe_allow_html=True)


#Function to test certain index of dataset
def test_demo(index):
    values = data.iloc[index,]  # Input the value from dataset
    a = st.sidebar.slider('Metal Density', 0.0000, 1.0000, values[0], step = 0.001)
    b = st.sidebar.slider('Start Gauge', 0.00, 1.00, float(values[1]), 0.01)
    c = st.sidebar.slider('Start Width', 0.00, 60.00, float(values[2]), 0.01)
    d = st.sidebar.slider('Start Lbs', 0, 33000, int(values[3]), 1)
    e = st.sidebar.slider('Start Date_Year', 2000, 2100, values[6].year, 1)
    f = st.sidebar.slider('Start Date_Month', 1, 12, values[6].month, 1)
    g = values[6].week
    h = st.sidebar.slider('Start Date_Day', 1, 31, values[6].day, 1)
    i = values[6].dayofweek
    j = values[6].dayofweek > 4


    
    #Print the prediction result
    alg = ['Lasso', 'Ridge', 'Random Forest']
    classifier = st.selectbox('Which algorithm?', alg)

    pipe1 = pickle.load(open('lasso.sav', 'rb'))
    res1 = pipe1.predict(np.array([a, b, c, d, e, f, g, h, i, j]).reshape(1, -1))[0]
    if res1 > 1:
        res1 = 1
    pipe2 = pickle.load(open('ridge.sav', 'rb'))
    res2 = pipe2.predict(np.array([a, b, c, d, e, f, g, h, i, j]).reshape(1, -1))[0]
    if res2 > 1:
        res2 = 1
    pipe3 = pickle.load(open('rf.sav', 'rb'))
    res3 = pipe3.predict(np.array([a, b, c, d, e, f, g, h, i, j]).reshape(1, -1))[0]
    if res3 > 1:
        res3 = 1
    result = mean([res1, res2, res3])
    
    if classifier == 'Lasso':

        st.write('Yield Prediction:  ', round(res1, 4))
        st.write('Average Yield Prediction:  ', round(result, 4))
        pred = pipe1.predict(X)
        pred[pred > 1] = 1
        mse = mean_squared_error(y,pred)
        st.write('Mean Squared Error: ', round(mse, 5))
        
        x_data = pred
        ax = sns.distplot(x_data)
        plt.axvline(res1, color='red')
        ax.set(xlabel = 'Yeild Prediction', ylabel = "Frequency")
        st.pyplot()
        
        f = open("Arubis.svg","r")
        lines = f.readlines()
        line_string=''.join(lines)
        render_svg(line_string)

    elif classifier == 'Ridge':

        st.write('Yield Prediction:  ', round(res2, 4))
        st.write('Average Yield Prediction:  ', round(result, 4))
        pred = pipe2.predict(X)
        pred[pred > 1] = 1
        mse = mean_squared_error(y,pred)
        st.write('Mean Squared Error: ', round(mse, 5))

        x_data = pred
        ax = sns.distplot(x_data)
        plt.axvline(res2, color='red')
        ax.set(xlabel = 'Yeild Prediction', ylabel = "Frequency")
        st.pyplot()
        
        f = open("Arubis.svg","r")
        lines = f.readlines()
        line_string=''.join(lines)
        render_svg(line_string)
        

    else:

        st.write('Yield Prediction:  ', round(res3, 4))
        st.write('Average Yield Prediction:  ', round(result, 4))
        pred = pipe3.predict(X)
        pred[pred > 1] = 1
        mse = mean_squared_error(y,pred)
        st.write('Mean Squared Error: ', round(mse, 5))

        x_data = pred
        ax = sns.distplot(x_data)
        plt.axvline(res3, color='red')
        ax.set(xlabel = 'Yeild Prediction', ylabel = "Frequency")
        st.pyplot()
        
        f = open("Arubis.svg","r")
        lines = f.readlines()
        line_string=''.join(lines)
        render_svg(line_string)

        


# title
st.title('Aurubis Yield Prediction Model')
# show data
if st.checkbox('Show dataframe'):
    st.write(data)
# st.write(X_train) # Show the dataset

number = st.text_input('Choose a row of information in the dataset (0~90652):', 0)  # Input the index number

test_demo(int(number))  # Run the test function



FileNotFoundError: [Errno 2] No such file or directory: 'Arubis.svg'

<Figure size 432x288 with 0 Axes>