<a href="https://colab.research.google.com/github/amara929/amara929/blob/main/Streamlit_for_Project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q streamlit pyngrok

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

st.set_page_config(page_title="Industry Code Regression", layout="centered")

st.title("Linear Regression on Annual Enterprise Survey")

uploaded_file = st.file_uploader("/content/annual-enterprise-survey-2023-financial-year-provisional (1).csv", type=["csv"])
if uploaded_file:
    df = pd.read_csv("/content/annual-enterprise-survey-2023-financial-year-provisional (1).csv")

    st.subheader("Raw Data")
    st.write(df.head())

    if 'Industry_aggregation_NZSIOC' in df.columns and 'Industry_code_NZSIOC' in df.columns:
        st.success("Columns found. Proceeding with visualization and modeling.")

        fig1, ax1 = plt.subplots()
        sns.histplot(df['Industry_aggregation_NZSIOC'], kde=True, ax=ax1)
        st.pyplot(fig1)

        fig2, ax2 = plt.subplots()
        sns.histplot(df['Industry_code_NZSIOC'], kde=True, ax=ax2)
        st.pyplot(fig2)

        encoder_x = LabelEncoder()
        encoder_y = LabelEncoder()

        X_raw = df[['Industry_aggregation_NZSIOC']].values
        Y_raw = df[['Industry_code_NZSIOC']].values

        X_encoded = encoder_x.fit_transform(X_raw.ravel()).reshape(-1, 1)
        Y_encoded = encoder_y.fit_transform(Y_raw.ravel()).reshape(-1, 1)

        X_train, X_test, Y_train, Y_test = train_test_split(X_encoded, Y_encoded, test_size=0.2, random_state=42)

        model = LinearRegression()
        model.fit(X_train, Y_train)
        y_pred = model.predict(X_test)

        st.subheader("Model Evaluation")
        st.write(f"**MAE:** {mean_absolute_error(Y_test, y_pred):.4f}")
        st.write(f"**MSE:** {mean_squared_error(Y_test, y_pred):.4f}")
        st.write(f"**R² Score:** {r2_score(Y_test, y_pred):.4f}")

        st.subheader("Regression Visualization")
        fig3, ax3 = plt.subplots()
        ax3.scatter(X_test, Y_test, color='blue', label='Actual')
        ax3.plot(X_test, y_pred, color='red', label='Prediction')
        ax3.set_xlabel('Industry Aggregation (Encoded)')
        ax3.set_ylabel('Industry Code (Encoded)')
        ax3.set_title('Linear Regression Fit')
        ax3.legend()
        st.pyplot(fig3)

        st.subheader("Make a Prediction")
        user_input = st.number_input("Enter Industry Aggregation (Encoded)", min_value=0, max_value=int(X_encoded.max()))
        predicted = model.predict(np.array([[user_input]]))[0][0]
        st.write(f"Predicted Industry Code (Encoded): {predicted:.2f}")
    else:
        st.warning("Required columns not found in the uploaded CSV.")
        st.write("Available columns:", df.columns.tolist())
else:
    st.info("/content/annual-enterprise-survey-2023-financial-year-provisional (1).csv")

Overwriting app.py


In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("2vTtEvvIaMrIm6QLW7nBhUarLkv_7tgLY7nDjay4jxE6idJRS")

!streamlit run app.py &>/content/log.txt &
# Pass the port as part of the addr argument
public_url = ngrok.connect(addr="8501")
print(f"Streamlit app is live at: {public_url}")

Streamlit app is live at: NgrokTunnel: "https://5ca6-34-106-149-13.ngrok-free.app" -> "http://localhost:8501"
