# Educacion vs TECHx5

[sankey (go)](https://plotly.com/python/sankey-diagram/)

### grafico No usado en el dashboard final 

## 1. importar las librerías + csv con los datos de la encuesta. 

In [1]:
# importar librerias

import pandas as pd
import plotly.express as px  
import plotly.graph_objects as go

import webbrowser
from threading import Timer
import numpy as np
import dash
from dash import Dash, dcc, html, Input, Output, State
import dash_bootstrap_components as dbc #bootstrap


#crear un dataframe con toda la informacion de la encuesta
#anyadido: path relativo
df21 = pd.read_csv ('../../data/survey_results_public2021.csv', index_col = [0]) # El indice sera la columna con el ID de la respuesta
# df21.LearnCode.value_counts() # los valores de cada fila en LearnCode estan separados por ;

## 2. Preprocesar datos.

Tratar las columnas/conjunto de datos para comenzar a crear los gráficos. 

In [2]:
def edTech (df3,col):

    # copiamos elementos que no sean NaN
    df=df3[['EdLevel', col]].dropna().copy() 
    
    #eliminar info irrelevante
    df.drop(df[df.EdLevel == 'Something else'].index, inplace = True) 

    #explode EdLevel y la columna de eleccion. 
    #el df resultante contendra los valores unicos en ambas columnas
    df2 = (df.explode(df.columns.tolist())
      .apply(lambda col: col.str.split(';')) 
      .explode('EdLevel')
      .explode(col))

    #anyadir una columna count para saber el numero de col en Edlevel.
    df4= (pd.crosstab(df2['EdLevel'], df2[col])
       .melt(value_name='count', ignore_index=False)
       .reset_index())
    
    #eliminar parentesis en educacion
    df4['EdLevel'] = df4['EdLevel'].str.replace(r"\(.*?\)", "", regex=True) 
    
    # Seleccionar solo un top 5 de tecnologias (lenguajes, bases de datos...)
    top=df4[[col,'count']].groupby(col).sum().sort_values(by='count',ascending=False).index[:5]
    return df4[df4[col].isin(top.tolist())]

def etiquetas (df, col): 

    edLabels = list(df.EdLevel.unique())
    colLabels = list(df[col].unique())

    labels = edLabels + colLabels

    return labels

In [3]:
#edTech (df21,'DatabaseHaveWorkedWith')

## 3. Layout + callback

In [4]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP],
                meta_tags=[{'name': 'viewport', #permite ser responsive en movil
                            'content': 'width=device-width, initial-scale=1.0, maximum-scale=1.2, minimum-scale=0.5,'}]
                )
app.layout = html.Div([
    
    dbc.Row([
            dcc.Dropdown(id="select_opt",  
                 options=[ #el usuario va a ver las label.
                     {"label": "#", "value": "numero"},
                     {"label": "%", "value": "porcentaje"}],
                 multi=False,
                 value="numero",
                 style={'width': "40%"}
                 ),

    dcc.Graph(id='my_survey', figure={}, style={'height': '80vh'}) # graph container
            
    ]
)
])



@app.callback(
    Output(component_id='my_survey', component_property='figure'),
    Input(component_id='select_opt', component_property='value'))
    
def tab_content(active_tab):
    
    #df = pd.DataFrame()
    nameColumns= ['DatabaseHaveWorkedWith','LanguageHaveWorkedWith', 'PlatformHaveWorkedWith',  
                  'WebframeHaveWorkedWith', 'MiscTechHaveWorkedWith', 'ToolsTechHaveWorkedWith', 
                  'NEWCollabToolsHaveWorkedWith']

    name = ["Databases", "Languages", "Platforms", "Webframes", "Miscelaneous Tech", "Tools", "Collab tools"]
    categoria = ""

    df = edTech(df21,nameColumns[0])
    categoria = name[0]
    labels = etiquetas (df, nameColumns[0])
    
    source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
    # Destino de los enlaces (índices a la lista de etiquetas)
    target=df[nameColumns[0]].apply(lambda x: labels.index(x)).tolist()
    
    fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                line = dict(color = "black", width = 0.5),
                                                label = etiquetas (df, nameColumns[0]),
                                               ),
                                    link = dict(
                                        source = source, 
                                        target = target,
                                        value = df['count'] 
                                          ))])
    fig.update_layout(title_text="Edades - Lenguajes", font_size=12)
    return fig

## 4. run 

In [5]:
app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
