# Caracteristicas por tipo de Dev. 

[Sunburst(px)](https://plotly.com/python/sunburst-charts/)

In [1]:
# importar librerias
import numpy as np
import pandas as pd
import plotly.express as px  
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dash import Dash, dcc, html, Input, Output
import us
from scipy import stats
import textwrap

df = pd.read_csv('../data/survey_results_public2021.csv', engine="c",
                  usecols= ["MainBranch","Country","US_State","EdLevel", "Age", "Employment",
                   "Age1stCode","LearnCode","YearsCode","YearsCodePro","DevType","OpSys","NEWStuck",
               "ConvertedCompYearly", "LanguageHaveWorkedWith", "LanguageWantToWorkWith","DatabaseHaveWorkedWith",
               "DatabaseWantToWorkWith", "PlatformHaveWorkedWith","PlatformWantToWorkWith",
               "WebframeHaveWorkedWith", "WebframeWantToWorkWith","MiscTechHaveWorkedWith",
               "MiscTechWantToWorkWith", "ToolsTechHaveWorkedWith","ToolsTechWantToWorkWith",
               'NEWCollabToolsHaveWorkedWith', 'NEWCollabToolsWantToWorkWith', "ConvertedCompYearly"])# El indice sera la columna con el ID de la respuesta


# procesar datos.

In [2]:
def caracteristicasDev (df):
    
    df4=df[['DevType','YearsCode','YearsCodePro', 'Age','LearnCode','Employment','ConvertedCompYearly']].dropna().copy()

    #eliminar info no relevante
    df4.drop(df4[df4['YearsCodePro'] == "Less than 1 year"].index, inplace = True) 
    df4.drop(df4[df4['YearsCodePro'] == "More than 50 years"].index, inplace = True)

    df4.drop(df4[df4['YearsCode'] == "Less than 1 year"].index, inplace = True) 
    df4.drop(df4[df4['YearsCode'] == "More than 50 years"].index, inplace = True)

    #spit cada fila de la columna DevType por cada ; y agrupar el nuevo df por esa columna. 
    df5 = df4.assign(DevType=df4['DevType'].str.split(';')).explode('DevType').groupby('DevType').agg(median_pro=('YearsCodePro', 'median'), #nueva columna que calculara la media de yearscodepro
                     median_code=('YearsCode','median'), 
                     avg_money=('ConvertedCompYearly','mean'),                                                                                   
                     respuestas=('DevType','count')).reset_index().round(2) #reiniciar el indice
    #eliminar datos irrelevantes
    df5.drop(df5[df5['DevType'] == "Other (please specify):"].index, inplace = True) 

    #nuevo df
    df6=df4[['DevType','LearnCode']].dropna().copy()

    #separar todos los valores de las columnas devtype y LearnCode
    df6 = (df6.explode(df6.columns.tolist())
          .apply(lambda col: col.str.split(';'))
          .explode('DevType')
          .explode('LearnCode'))


    #calcular la moda de learn code por tipo de dev 
    #La moda estadística es aquel valor que, dentro de un conjunto de datos, se repite el mayor número de veces.
    mode =df6.groupby('DevType')['LearnCode'].apply(lambda x: x.mode()).reset_index()

    df5['LearnCode'] = mode["LearnCode"]
    
    
    return df5


In [3]:
# caracteristicasDev(df)  # ver df


# layout

In [4]:
app = Dash(__name__)
app.layout = html.Div([

    html.H1("Tipo de desarrollador", style={'text-align': 'center'}), #cabecero h1. Header
    
    #primera mini prueba con un menu desplegable.
    dcc.Dropdown(id="select_opt",  
                 options=[ #el usuario va a ver las label.
                     {"label": "2021", "value": "2021"},
                     {"label": "2020", "value": "2020"}],
                 multi=False,
                 value="2021",
                 style={'width': "40%"}
                 ),

    dcc.Graph(id='my_survey', figure={}, style = {'height':'90vh'}) # graph container

])

# callback

In [5]:
@app.callback(
    Output(component_id='my_survey', component_property='figure'),
    Input(component_id='select_opt', component_property='value'))
def update_graph(option_slctd):
    

#DevType	median_pro	median_code	avg_money	respuestas	LearnCode

#barras
#     fig = px.bar(caracteristicasDev(df), y="DevType", x= ["median_pro", "median_code"],
#              orientation = "h", barmode = 'group',
#               text_auto= True)
    
    fig = px.sunburst(caracteristicasDev(df), path=['DevType'], values="avg_money",
                                hover_data=['median_pro', "median_code", "avg_money", "LearnCode", "respuestas"],
                                labels={'median_pro': 'Years Coding Profesionaly (median)',
                                        'median_code': 'Years Coding (median)',
                                        'avg_money': 'Average salary ($)', 'DevType': 'Dev type'},
                                color_discrete_sequence=px.colors.qualitative.Pastel, hover_name="DevType")
    return fig

# run

In [6]:
app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
