# Educacion vs TECHx5

## 1. importar las librerías + csv con los datos de la encuesta. 

In [1]:
# importar librerias

import pandas as pd
import plotly.express as px  
import plotly.graph_objects as go

import webbrowser
from threading import Timer
import numpy as np
import dash
from dash import Dash, dcc, html, Input, Output, State
import dash_bootstrap_components as dbc #bootstrap


#crear un dataframe con toda la informacion de la encuesta
#anyadido: path relativo
df21 = pd.read_csv ('../data/survey_results_public2021.csv', index_col = [0]) # El indice sera la columna con el ID de la respuesta
df21.DevType.value_counts() #mostrar df (

Developer, full-stack                                                                                                                                                                                                                                                                                                                                                                                          8601
Developer, back-end                                                                                                                                                                                                                                                                                                                                                                                            5467
Developer, front-end                                                                                                                                                                            

## 2. Preprocesar datos.

Tratar las columnas/conjunto de datos para comenzar a crear los gráficos. En este caso Age1stcode

In [2]:
def edTech (df3,col):

    # copiamos elementos que no sean NaN
    df=df3[['EdLevel', col]].dropna().copy() 
    
    #eliminar info irrelevante
    df.drop(df[df.EdLevel == 'Something else'].index, inplace = True) 

    df2 = (df.explode(df.columns.tolist())
      .apply(lambda col: col.str.split(';')) 
      .explode('EdLevel')
      .explode(col))


    df4= (pd.crosstab(df2['EdLevel'], df2[col])
       .melt(value_name='count', ignore_index=False)
       .reset_index())
    
    #eliminar parentesis en educacion
    df4['EdLevel'] = df4['EdLevel'].str.replace(r"\(.*?\)", "", regex=True) 
    
    # Seleccionar solo un top 5 de tecnologias (lenguajes, bases de datos...)
    top=df4[[col,'count']].groupby(col).sum().sort_values(by='count',ascending=False).index[:5]
    return df4[df4[col].isin(top.tolist())]



In [3]:
def etiquetas (df, col):

    edLabels = list(df.EdLevel.unique())
    colLabels = list(df[col].unique())

    labels = edLabels + colLabels

    return labels

## 3. Grafico. 

En este caso, un diagrama de barras.

In [4]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP],
                meta_tags=[{'name': 'viewport', #permite ser responsive en movil
                            'content': 'width=device-width, initial-scale=1.0, maximum-scale=1.2, minimum-scale=0.5,'}]
                )
app.layout = html.Div([
    
    dbc.Row([
            dbc.Tabs([
                dbc.Tab(label='Databases', tab_id='db2',labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"),
                dbc.Tab(label='Languages', tab_id='lang2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"), 
                dbc.Tab(label='Platform', tab_id='plat2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"), 
                dbc.Tab(label='webframe', tab_id='web2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"), 
                dbc.Tab(label='Miscelaneous', tab_id='misc2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"), 
                dbc.Tab(label='Tools', tab_id='tools2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"), 
                dbc.Tab(label='Collab', tab_id='colab2', labelClassName="text-primary font-weight-bold", activeLabelClassName="text-info"),
                ],
                id="sankey",
                active_tab="db2",
            ),
        dbc.CardBody(html.P(id="tabu-content", className="card-text")),
    ]
)
])



#tabs

@app.callback(
    Output("tabu-content", "children"), 
    Input("sankey", "active_tab"))
    
def tab_content(active_tab):
    
    
    #df = pd.DataFrame()
    nameColumns= ['DatabaseHaveWorkedWith','LanguageHaveWorkedWith', 'PlatformHaveWorkedWith',  
                  'WebframeHaveWorkedWith', 'MiscTechHaveWorkedWith', 'ToolsTechHaveWorkedWith', 
                  'NEWCollabToolsHaveWorkedWith']

    name = ["Databases", "Languages", "Platforms", "Webframes", "Miscelaneous Tech", "Tools", "Collab tools"]
    categoria = ""

    if active_tab is not None: #para evitar un error hay que añadir un caso donde active_tab sste vacio
        if active_tab =="db2":
            
            df = edTech(df21,nameColumns[0])
            categoria = name[0]
            
            #etiquetas
            labels = etiquetas (df, nameColumns[0])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[0]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'})         
            
            
        elif active_tab == "lang2":
            df = edTech(df21,nameColumns[1])
            categoria = name[1]
            
            #etiquetas
            labels = etiquetas (df, nameColumns[1])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[1]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
            
        elif active_tab == "plat2":
            df = edTech(df21,nameColumns[2])
            categoria = name[2]
            
            #etiquetas
            labels = etiquetas (df, nameColumns[2])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[2]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
            
        elif active_tab == "web2":
            df = edTech(df21,nameColumns[3])
            categoria = name[3]
            
            #etiquetas
            labels = etiquetas (df, nameColumns[3])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[3]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))]) 
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
            
        elif active_tab == "misc2":
            df = edTech(df21,nameColumns[4])
            categoria = name[4]
            
            #etiquetas
            labels = etiquetas (df, nameColumns[4])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[4]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
            
        elif active_tab == "tools2":
            categoria = name[5]
            df = edTech(df21,nameColumns[5])
            
            #etiquetas
            labels = etiquetas (df, nameColumns[5])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[5]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
            
        elif active_tab == "colab2":
            categoria = name[6]
            df = edTech(df21,nameColumns[6])
            
            #etiquetas
            labels = etiquetas (df, nameColumns[6])
            # Origen de los enlaces (índices a la lista de etiquetas)
            source=df['EdLevel'].apply(lambda x: labels.index(x)).tolist()
            # Destino de los enlaces (índices a la lista de etiquetas)
            target=df[nameColumns[6]].apply(lambda x: labels.index(x)).tolist()

            fig = go.Figure(data=[go.Sankey(node = dict(pad = 15, thickness = 20,
                                                        line = dict(color = "black", width = 0.5),
                                                        label = labels,
                                                       ),
                                            link = dict(
                                                source = source, 
                                                target = target,
                                                value = df['count'] 
                                                  ))])
            fig.update_layout(title_text="Education level and top 5 of most used "+categoria, font_size=12)
            
            return dcc.Graph(figure=fig, style={'height': '90vh'}) 
        
        return html.P("tabs: This shouldn't ever be displayed...")
        
        
    return html.P("This shouldn't ever be displayed...")

## 4. run server

In [None]:
app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
