# Aplicación Web de Ciencia de Datos <Streamlit>

Autora: Beatriz Ibarra Mendoza  
Última actualización: 16/10/2023


Este notebook analiza y crea una aplicación web con streamlit a partir de una base de datos sobre empleados de una empresa. Incluye las siguientes etapas:  
* *Instalación* de paqueterías y *exploración* de data.  
* *Instalación* de Streamlit y *localtunnel*.  
* Desarrollo de aplicación *employees.py*.


### Instalación paqueterías y exploración de data

In [None]:
import pandas as pd

In [None]:
employees1 = pd.read_csv('/content/Employees (1).csv')
employees1

Unnamed: 0,Employee_ID,Gender,Age,Education_Level,Relationship_Status,Hometown,Unit,Decision_skill_possess,Time_of_service,Time_since_promotion,growth_rate,Travel_Rate,Post_Level,Pay_Scale,Compensation_and_Benefits,Work_Life_balance,Attrition_rate
0,EID_23371,F,42.0,4,Married,Franklin,IT,Conceptual,4.0,4,33,1,1,7.0,type2,3.0,0.1841
1,EID_18000,M,24.0,3,Single,Springfield,Logistics,Analytical,5.0,4,36,0,3,6.0,type2,4.0,0.0670
2,EID_3891,F,58.0,3,Married,Clinton,Quality,Conceptual,27.0,3,51,0,2,8.0,type2,1.0,0.0851
3,EID_17492,F,26.0,3,Single,Lebanon,Human Resource Management,Behavioral,4.0,3,56,1,3,8.0,type2,1.0,0.0668
4,EID_22534,F,31.0,1,Married,Springfield,Logistics,Conceptual,5.0,4,62,1,3,2.0,type3,3.0,0.1827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,EID_16328,F,23.0,5,Married,Franklin,Operarions,Behavioral,4.0,1,20,1,2,6.0,type3,3.0,0.7839
6996,EID_8387,F,44.0,1,Married,Lebanon,R&D,Analytical,16.0,2,59,0,2,4.0,type3,3.0,0.1055
6997,EID_8077,F,49.0,3,Single,Springfield,IT,Directive,10.0,1,59,1,2,3.0,type2,4.0,0.7847
6998,EID_19597,F,47.0,3,Married,Washington,Sales,Behavioral,24.0,1,21,1,5,8.0,type0,2.0,0.4162


In [None]:
employees1.shape

(7000, 17)

In [None]:
employees1.columns

Index(['Employee_ID', 'Gender', 'Age', 'Education_Level',
       'Relationship_Status', 'Hometown', 'Unit', 'Decision_skill_possess',
       'Time_of_service', 'Time_since_promotion', 'growth_rate', 'Travel_Rate',
       'Post_Level', 'Pay_Scale', 'Compensation_and_Benefits',
       'Work_Life_balance', 'Attrition_rate'],
      dtype='object')

In [None]:
employees1.head(10)

Unnamed: 0,Employee_ID,Gender,Age,Education_Level,Relationship_Status,Hometown,Unit,Decision_skill_possess,Time_of_service,Time_since_promotion,growth_rate,Travel_Rate,Post_Level,Pay_Scale,Compensation_and_Benefits,Work_Life_balance,Attrition_rate
0,EID_23371,F,42.0,4,Married,Franklin,IT,Conceptual,4.0,4,33,1,1,7.0,type2,3.0,0.1841
1,EID_18000,M,24.0,3,Single,Springfield,Logistics,Analytical,5.0,4,36,0,3,6.0,type2,4.0,0.067
2,EID_3891,F,58.0,3,Married,Clinton,Quality,Conceptual,27.0,3,51,0,2,8.0,type2,1.0,0.0851
3,EID_17492,F,26.0,3,Single,Lebanon,Human Resource Management,Behavioral,4.0,3,56,1,3,8.0,type2,1.0,0.0668
4,EID_22534,F,31.0,1,Married,Springfield,Logistics,Conceptual,5.0,4,62,1,3,2.0,type3,3.0,0.1827
5,EID_2278,M,54.0,3,Married,Lebanon,Purchasing,Conceptual,19.0,1,21,2,5,9.0,type2,1.0,0.7613
6,EID_18588,F,21.0,4,Married,Springfield,Purchasing,Directive,2.0,1,50,1,1,6.0,type2,2.0,0.2819
7,EID_1235,F,,3,Married,Springfield,Sales,Directive,34.0,4,23,1,4,6.0,type3,2.0,0.1169
8,EID_10197,M,40.0,4,Single,Springfield,Production,Analytical,13.0,1,49,0,1,9.0,type0,4.0,0.1968
9,EID_21262,M,45.0,3,Married,Lebanon,IT,Directive,21.0,4,34,0,2,5.0,type3,4.0,0.287


In [None]:
employees1.dtypes

Employee_ID                   object
Gender                        object
Age                          float64
Education_Level                int64
Relationship_Status           object
Hometown                      object
Unit                          object
Decision_skill_possess        object
Time_of_service              float64
Time_since_promotion           int64
growth_rate                    int64
Travel_Rate                    int64
Post_Level                     int64
Pay_Scale                    float64
Compensation_and_Benefits     object
Work_Life_balance            float64
Attrition_rate               float64
dtype: object

### Instalación de streamlit y localtunnel

In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.27.2-py2.py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
Collecting validators<1,>=0.2 (from streamlit)
  Downloading validators-0.22.0-py3-none-any.whl (26 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.37-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m9.9 MB/s[0m eta [36m0:00:0

In [None]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[34;40mnotice[0m[35m[0m created a lockfile as package-lock.json. You should commit this file.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
+ localtunnel@2.0.2
added 22 packages from 22 contributors and audited 22 packages in 2.345s

3 packages are looking for funding
  run `npm fund` for details

found [92m0[0m vulnerabilities

[K[?25h

In [None]:
!curl ipv4.icanhazip.com

34.81.87.170


### Desarrollo de aplicación (employees.py)

In [None]:
%%writefile employees.py

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Información general de la aplicación
st.title('Información sobre empleados y deserción laboral 📉📌')
st.header('Fuente: Hackathon HackerEarth 2020')
st. write('Dashboard creado por Beatriz Ibarra Mendoza')

# Cache
@st.cache_data(ttl=3600*24)
def load_employees():
    data = pd.read_csv('/content/Employees (1).csv')
    return data

loading_data = st.text("Loading data...")
employees = load_employees()
loading_data.text("Data loaded (cached)")

# Side bar:
st.sidebar.title('Búsquedas')

# Side bar - Mostrar df completo.
if st.sidebar.checkbox('Mostrar todos los datos'):
    st.write(employees)

# Side bar - Búsquedas por texto.
search_1 = st.sidebar.text_input('Ingresa ID de empleado:', '')
button_1 = st.sidebar.button('Buscar ID')
if button_1:
    id_search = employees[employees['Employee_ID'].str.contains(search_1, case=False)]
    st.write(id_search)

search_2 = st.sidebar.text_input('Ingresa una ciudad:', '')
button_2 = st.sidebar.button('Buscar ciudad')
if button_2:
    hometown_search = employees[employees['Hometown'].str.contains(search_2, case=False)]
    st.write(hometown_search)

search_3 = st.sidebar.text_input('Ingresa la unidad de la empresa:', '')
button_3 = st.sidebar.button('Buscar unidad')
if button_3:
    unit_search = employees[employees['Unit'].str.contains(search_3, case=False)]
    st.write(unit_search)

# Select box - Nivel educativo.
level_list = ['Selecciona un nivel'] + sorted(employees['Education_Level'].unique())
educ_level = st.sidebar.selectbox('Nivel Educativo', level_list)
if educ_level != 'Selecciona un nivel':
    st.header('Filtrado de datos por nivel educativo.')
    educ_filtered = employees[employees['Education_Level'] == educ_level]
    educ_counts = len(educ_filtered)
    st.write(educ_filtered)
    st.write(f'La cantidad de empleados con este nivel educativo es {educ_counts!r}.')

# Select box - Ciudades de estudio.
cities_list = ['Selecciona una ciudad'] + sorted(employees['Hometown'].unique())
cities_study = st.sidebar.selectbox('Ciudad', cities_list)
if cities_study != 'Selecciona una ciudad':
    st.header('Filtrado de datos por ciudad de estudio.')
    cities_filtered = employees[employees['Hometown'] == cities_study]
    cities_counts = len(cities_filtered)
    st.write(cities_filtered)
    st.write(f'La cantidad de empleados en esta ciudad es {cities_counts!r}.')

# Select box - Unidades de trabajo.
unit_list = ['Selecciona una unidad'] + sorted(employees['Unit'].unique())
unit = st.sidebar.selectbox('Unidad', unit_list)
if unit != 'Selecciona una unidad':
    st.header('Filtrado de datos por unidades de trabajo.')
    units_filtered = employees[employees['Unit'] == unit]
    units_counts = len(units_filtered)
    st.write(units_filtered)
    st.write(f'La cantidad de empleados en esta unidad es {units_counts!r}.')

# Radio button - Menú para desplegar gráficos.
menu = st.sidebar.radio('Visualizar:', ['Borrar gráficos', 'Histograma/Edades', 'Frecuencias/Unidad', 'Ciudad/Tasa deserción', 'Edad/Tasa deserción', 'Tiempo de servicio/Tasa deserción'])

# Histograma - Empleados agrupados por edad.
if menu == 'Histograma/Edades':
    st.write('Este gráfico muestra la cantidad de empleados por grupos de edad:')
    fig, ax = plt.subplots()
    sns.histplot(x=employees['Age'], bins=[15,20,30,40,50,60,70], color='darkseagreen', edgecolor=None)
    st.pyplot(fig)

# Gráfica de frecuencias - Empleados por unidad.
if menu == 'Frecuencias/Unidad':
    st.write('Este gráfico muestra la cantidad de empleados que hay en cada una de las unidades de estudio:')
    employees_unit = employees['Unit'].value_counts()
    employees_unit_count = pd.DataFrame({'Unit':employees_unit.index, 'Count':employees_unit.values})
    fig2, ax = plt.subplots()
    sns.barplot(employees_unit_count, x='Unit', y='Count', palette='Set2')
    ax.tick_params(axis='x', rotation=90)
    st.pyplot(fig2)
    st.write(employees_unit_count)

# Gráfico .lineplot - Tasa de deserción por ciudad.
if menu == 'Ciudad/Tasa deserción':
    st.write('Este gráfico muestra la tasa de deserción en cada una de las ciudades de estudio:')
    fig3, ax = plt.subplots()
    sns.lineplot(x=employees['Hometown'], y=employees['Attrition_rate'], ci=None, color='darkseagreen')
    st.pyplot(fig3)

# Gráfico scatter plot - Edad/Tasa de deserción.
if menu == 'Edad/Tasa deserción':
    st.write('Este gráfico muestra la relación que existe entre la edad de los empleados y la tasa de deserción:')
    fig4, ax = plt.subplots()
    sns.scatterplot(x=employees['Attrition_rate'], y=employees['Age'], color='darkseagreen')
    st.pyplot(fig4)

# Gráfico scatter plot - Tiempo de servicio/Tasa de deserción.
if menu == 'Tiempo de servicio/Tasa deserción':
    st.write('Este gráfico muestra la relación que existe entre el tiempo de servicio y la tasa de deserción:')
    fig5, ax = plt.subplots()
    sns.scatterplot(x=employees['Attrition_rate'], y=employees['Time_of_service'], color='darkseagreen')
    st.pyplot(fig5)


Overwriting employees.py


In [None]:
!streamlit run employees.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 1.638s
your url is: https://honest-moons-cut.loca.lt
^C
