<a href="https://colab.research.google.com/github/axisSN01/Python-learning-path-ChatGPT/blob/main/Create_streamlit_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Run streamlit app from a Google Colab Notebook
> Credits: Created by [Manuel Romero](https://twitter.com/mrm8488),
                      [GITHUB](https://github.com/mrm8488/shared_colab_notebooks/blob/master/Create_streamlit_app.ipynb)

In [15]:
# check if pandas is installed and install if necessary
!pip show pandas || pip install pandas

# check if seaborn is installed and install if necessary
!pip show seaborn || pip install seaborn

# check if plotly is installed and install if necessary
!pip show plotly || pip install plotly


# check if plotly is installed and install if necessary
!pip show streamlit || pip install streamlit


Name: pandas
Version: 1.5.3
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: The Pandas Development Team
Author-email: pandas-dev@python.org
License: BSD-3-Clause
Location: /usr/local/lib/python3.10/dist-packages
Requires: numpy, python-dateutil, pytz
Required-by: altair, arviz, cmdstanpy, cufflinks, datascience, db-dtypes, dopamine-rl, fastai, google-colab, gspread-dataframe, holoviews, mizani, mlxtend, pandas-datareader, pandas-gbq, plotnine, prophet, pymc, seaborn, sklearn-pandas, statsmodels, streamlit, vega-datasets, xarray, yfinance
Name: seaborn
Version: 0.12.2
Summary: Statistical data visualization
Home-page: 
Author: 
Author-email: Michael Waskom <mwaskom@gmail.com>
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: matplotlib, numpy, pandas
Required-by: missingno
Name: plotly
Version: 5.13.1
Summary: An open-source, interactive data visualization library for Python
Home-page: http

## Create a streamlit app example


In [None]:
%%writefile app.py

import streamlit as st
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

FIRST_OPTION = """
# Title of the app
st.title("Data Analysis App")

# Creating the Data source tab
def data_source():
    # File uploader widget
    file = st.file_uploader("Upload file", type=["xlsx", "csv", "txt"])
    if file is not None:
        # Reading the file
        df = pd.read_excel(file)
        # Preview of the table
        st.write("Preview of the table")
        st.write(df.head())
        # Totalizer of blanks, nulls, and table size
        st.write("Totalizer of blanks, nulls, and table size")
        st.write(df.isnull().sum())
        st.write(df.shape)

# Creating the Relations tab
def relations():
    # File uploader widget
    file = st.file_uploader("Upload file", type=["xlsx", "csv", "txt"])
    if file is not None:
        # Reading the file
        df = pd.read_excel(file)
        # Pairplot
        st.write("Pairplot")
        sns.pairplot(df)
        st.pyplot()
        # Correlation heatmap
        st.write("Correlation heatmap")
        corr = df.corr()
        fig = px.imshow(corr)
        st.plotly_chart(fig)

# Creating the 3D plot tab
def three_d_plot():
    # File uploader widget
    file = st.file_uploader("Upload file", type=["xlsx", "csv", "txt"])
    if file is not None:
        # Reading the file
        df = pd.read_excel(file)
        # Choosing the columns for the 3D scatter plot
        st.write("Choose columns for 3D scatter plot")
        x_col = st.selectbox("X axis", df.columns)
        y_col = st.selectbox("Y axis", df.columns)
        z_col = st.selectbox("Z axis", df.columns)
        color_col = st.selectbox("Color", df.columns)
        size_col = st.selectbox("Size", df.columns)
        # 3D scatter plot
        st.write("3D scatter plot")
        fig = px.scatter_3d(df, x=x_col, y=y_col, z=z_col, color=color_col, size=size_col)
        st.plotly_chart(fig)

# Creating the time series tab
def time_series():
    # File uploader widget
    file = st.file_uploader("Upload file", type=["xlsx", "csv", "txt"])
    if file is not None:
        # Reading the file
        df = pd.read_excel(file)
        # Checking if the file has a datetime column
        datetime_cols = [col for col in df.columns if pd.api.types.is_datetime64_any_dtype(df[col])]
        if len(datetime_cols) == 0:
            st.write("File doesn't have a datetime column")
        else:
            # Time series plot
            st.write("Time series plot")
            datetime_col = datetime_cols[0]
            fig = px.line(df, x=datetime_col, y=df.columns[0])
            st.plotly_chart(fig)

"""

#########################################################################################################

SECOND_OPTION = """
# Set page configuration
st.set_page_config(page_title="Data Analysis App", page_icon=":bar_chart:", layout="wide")

# Load data function
@st.cache_data
def load_data(file):
    df = pd.read_excel(file, engine='openpyxl')
    return df

# Data source tab function
def data_source():
    st.header("Data source")
    file = st.file_uploader("Upload XLSX, CSV, or TXT file", type=["xlsx", "csv", "txt"])

    if file is not None:
        # Load data
        df = load_data(file)

        # Display overall analysis
        st.write("Preview of the table:")
        st.dataframe(df.head(10))

        totalizer = pd.DataFrame({
            'Concept': ['Blanks', 'Nulls', 'Data type', 'Cell size (bytes)'],
            'Value': [df.isnull().sum().sum(), df.isna().sum().sum(), dict(df.dtypes), df.memory_usage(deep=True).sum()]
        })
        st.write("Totalizer table:")
        st.table(totalizer)

        # Remember session settings
        st.session_state.data_source_df = df

        return df

# Relations tab function
def relations():
    st.header("Relations")

    # Load saved session settings
    df = st.session_state.data_source_df

    # Check if data source tab has been completed
    if df is None:
        st.warning("Please complete the 'Data source' tab first.")
        return

    # Display pairplot
    st.write("Pairplot")
    sns.pairplot(df)
    st.pyplot()

    # Display heatmap of correlations
    st.write("Heatmap of correlations")
    corr = df.corr()
    fig = go.Figure(data=go.Heatmap(z=corr.values, x=corr.index, y=corr.columns, colorscale='Viridis'))
    fig.update_layout(width=800, height=800)
    st.plotly_chart(fig)

# 3D plot tab function
def three_d_plot():
    st.header("3D plot")

    # Load saved session settings
    df = st.session_state.data_source_df

    # Check if data source tab has been completed
    if df is None:
        st.warning("Please complete the 'Data source' tab first.")
        return

    # Select columns to plot
    cols = st.multiselect("Select columns to plot", df.columns)

    if len(cols) != 5:
        st.warning("Please select exactly 5 columns to plot.")
        return

    # 3D scatter plot
    st.write("3D scatter plot")
    fig = px.scatter_3d(df, x=cols[0], y=cols[1], z=cols[2], color=cols[3], size=cols[4])
    st.plotly_chart(fig)

# Time series tab function
def time_series():
    st.header("Time series")

    # Load saved session settings
    df = st.session_state.data_source_df

    # Check if data source tab has been completed
    if df is None:
        st.warning("Please complete the 'Data source' tab first.")
        return

    # Check if file has a datetime column
    datetime_cols = df.select_dtypes(include='datetime64').columns
    if len(datetime_cols) == 0:
        st.warning("The file doesn't have a datetime column.")
        return
"""
############################################################################################
import streamlit as st
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

# Set up the app layout
st.set_page_config(page_title="Data Analysis App")
tabs = ["Data source", "Relations", "3D plot", "Time Series"]
page = st.sidebar.radio("Select a page", tabs)

# Define a function to read the file and return a dataframe
@st.cache_data(persist=True)
def load_data(file):
    if file is not None:
        if file.type == "text/csv":
            df = pd.read_csv(file)
        elif file.type == "text/plain":
            df = pd.read_csv(file, delimiter='\t')
        elif file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            df = pd.read_excel(file)
        else:
            st.error("Invalid file type.")
            return None

        # Initialization
        st.session_state['file'] = file

        return df

# Define a function to display the data source tab
def display_data_source_tab():
    st.subheader("Data source")
    file = st.file_uploader("Upload a file", type=["csv", "txt", "xlsx"])
    if file is not None:
        df = load_data(file)
        if df is not None:
            st.write("Preview of the first 100 rows:")
            st.dataframe(df.head(100))
            st.write("Totalizer:")
            null_counts = df.isnull().sum()
            size = df.memory_usage(index=True).sum()
            dtypes = df.dtypes
            data = {"nulls": null_counts, "data type": dtypes, "size (bytes)": size}
            totalizer = pd.DataFrame(data=data)
            st.dataframe(totalizer)

# Define a function to display the relations tab
def display_relations_tab():
    st.subheader("Relations")
    df = load_data(st.session_state['file'])
    if df is not None:
        sns.set_style("whitegrid")
        fig1 = sns.pairplot(df)
        st.pyplot(fig1)
        corr = df.corr()
        fig2 = px.imshow(corr)
        st.plotly_chart(fig2)

# Define a function to display the 3D plot tab
def display_3d_plot_tab():
    st.subheader("3D plot")
    df = load_data(st.session_state['file'])
    if df is not None:
        st.write("Select 5 columns to plot:")
        selected_columns = st.multiselect("Columns", df.columns)
        if len(selected_columns) == 5:
            fig3 = go.Figure(data=[go.Scatter3d(x=df[selected_columns[0]], y=df[selected_columns[1]], z=df[selected_columns[2]],
                                               mode='markers',
                                               marker=dict(color=df[selected_columns[3]], size=df[selected_columns[4]], colorscale='Viridis', opacity=0.8))])
            fig3.update_layout(scene=dict(xaxis_title=selected_columns[0],
                                          yaxis_title=selected_columns[1],
                                          zaxis_title=selected_columns[2]))
            st.plotly_chart(fig3)

# Define a function to display the time series tab
def display_time_series_tab():
    st.subheader("Time Series")
    df = load_data(st.session_state['file'])
    if df is not None:
        time_columns = df.select_dtypes(include='datetime64').columns.tolist()
        if len(time_columns) > 0:
            st.write("Select a time column:")
            time_column = st.selectbox("Column", time_columns)
            selected_columns = st.multiselect("Columns", df.columns)

# Displaying the selected tab
if page == "Data source":
    display_data_source_tab()
elif page == "Relations":
    display_relations_tab()
elif page == "3D plot":
    display_3d_plot_tab()
elif page == "Time series":
    display_time_series_tab()

#####################################################################################33
# # Creating the sidebar navigation menu
# menu = ["Data source", "Relations", "3D plot", "Time series"]
# choice = st.sidebar.radio("Select an option", menu)

# # Displaying the selected tab
# if choice == "Data source":
#     data_source()
# elif choice == "Relations":
#     relations()
# elif choice == "3D plot":
#     three_d_plot()
# elif choice == "Time series":
#     time_series()


## Install localtunnel

In [17]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
[K[?25h+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.758s

3 packages are looking for funding
  run `npm fund` for details

found [92m0[0m vulnerabilities



## Run streamlit in background

In [18]:
!streamlit run /content/app.py --server.port 8501 &>/content/logs.txt &

## Expose the port 8501
Then just click in the `url` showed.

A `log.txt`file will be created.

In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 3.03s
your url is: https://ripe-dryers-smoke-34-148-97-0.loca.lt
