In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import set_matplotlib_formats
set_matplotlib_formats("retina")

plt.style.use("seaborn-whitegrid")

In [None]:
raw = pd.read_csv('../input/kaggle-survey-2020/kaggle_survey_2020_responses.csv',low_memory=False)
raw.shape
raw.head()

In [None]:
question = raw.iloc[0]
question

In [None]:
answer = raw.drop([0])
answer

In [None]:
answer.info()

# Q1 What is your age (# years)?

In [None]:
question["Q1"]

In [None]:
Q1 = answer['Q1'].value_counts().sort_index()
sns.countplot(data=answer.sort_values("Q1"), x="Q1", 
              palette="Blues_r").set_title(question["Q1"])

# Q2 What is your gender? - Selected Choice

In [None]:
question_no = "Q2"
Q2 = answer[question_no].value_counts()
Q2

In [None]:
sns.countplot(data=answer, 
              y=question_no).set_title(question[question_no])

In [None]:
q1q2 = pd.crosstab(answer["Q1"], answer["Q2"])
q1q2[["Man", "Woman"]].plot.bar(rot=0)

In [None]:
q1q2[["Man", "Woman"]].sort_index(ascending=False).plot.barh(figsize=(10, 6), title="Age & Gender")

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(data=answer.sort_values("Q1"), x="Q1", hue="Q2").set_title("Age & Gender")

# Q3 In which country do you currently reside?

In [None]:
def show_countplot_by_qno(qno, fsize=(10, 6), order=None):
    """
    qno : question_no, ex) Q12
    fsize : figsize default (10, 6)
    order : optional order list, default value_counts().index
    """
    if not order :
        order = answer[qno].value_counts().index
        
    plt.figure(figsize=fsize)
    sns.countplot(data=answer, 
                  y=qno,
                  order=order,
                  palette="Blues_r"
                 ).set_title(question[qno])

In [None]:
show_countplot_by_qno("Q3", fsize=(12, 12))

# Q4 🎓 What is the highest level of formal education that you have attained or plan to attain within the next 2 years?

In [None]:
show_countplot_by_qno("Q4")

# Q5 Select the title most similar to your current role (or most recent title if retired):

In [None]:
show_countplot_by_qno("Q5")

# Q6 For how many years have you been writing code and/or programming?

In [None]:
show_countplot_by_qno("Q6")

In [None]:
q6_cols = ['I have never written code', '< 1 years', '1-2 years', '3-5 years', '5-10 years',  
       '10-20 years', '20+ years']
show_countplot_by_qno("Q6", order=q6_cols)

# Q7 What programming languages do you use on a regular basis? (Select all that apply)

In [None]:
question.filter(regex="Q7")[0].split("-")[0]

In [None]:
answer_Q7 = answer.filter(regex="Q7")
answer_Q7

In [None]:
answer_Q7_desc = answer_Q7.describe()
answer_Q7_desc

In [None]:
answer_Q7_count = answer_Q7_desc.loc[["top", "count"]].T
answer_Q7_count = answer_Q7_count.set_index("top")
answer_Q7_count = answer_Q7_count.sort_values("count", ascending=False)
answer_Q7_count

In [None]:
q7_title = question.filter(regex="Q7")[0].split("-")[0]

In [None]:
sns.barplot(data=answer_Q7_count, 
            y=answer_Q7_count.index, x="count", palette="Blues_r").set_title(q7_title)

In [None]:
def get_question_title_by_qno(qno):
    if qno in question.index:
        return question[qno]
    else:
        q_no = question.filter(regex=qno)[0].split("-")[0]
        return q_no

get_question_title_by_qno("Q7")

In [None]:
def get_multiple_choice_answer_by_qno(qno):
    df_answer = answer.filter(regex=qno)
    answer_desc = df_answer.describe()
    answer_count = answer_desc.loc[["top", "count"]].T.set_index("top")
    answer_count = answer_count.sort_values(by="count", ascending=False)
    return answer_count

In [None]:
get_multiple_choice_answer_by_qno("Q9")

In [None]:
def show_multiple_choice_bar_plot_by_qno(qno):
    multiple_choice_answer = get_multiple_choice_answer_by_qno(qno)
     
    plt.figure(figsize=(10, 6))
    sns.barplot(data=multiple_choice_answer,
                y=multiple_choice_answer.index, 
                x="count", 
                palette="Blues_r").set_title(get_question_title_by_qno(qno))

In [None]:
show_multiple_choice_bar_plot_by_qno("Q7")

In [None]:
q7_cols = answer.filter(regex="Q7").describe().loc["top"].tolist()

In [None]:
q2q7 = answer.filter(regex="Q7|Q2$")
q2q7_count = q2q7.groupby("Q2").count()
q2q7_count.columns = q7_cols
q2q7_count
g = q2q7_count.loc[["Man", "Woman"]].T.sort_values("Woman").plot.barh(title="Gender & Programming Language")

**define 📊 show_plot_by_qno**

In [None]:
question.index

In [None]:
def show_plot_by_qno(qno):
    if qno in question.index:
        show_countplot_by_qno(qno)
    else:
        show_multiple_choice_bar_plot_by_qno(qno)

# Q8 What programming language would you recommend an aspiring data scientist to learn first? - Selected Choice

In [None]:
show_plot_by_qno("Q8")

# Q9 Which of the following integrated development environments (IDE's) do you use on a regular basis? (Select all that apply)


In [None]:
show_plot_by_qno("Q9")

# Q10 Which of the following hosted notebook products do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q10")

# Q11 What type of computing platform do you use most often for your data science projects? - Selected Choice

In [None]:
show_plot_by_qno("Q11")

# Q12 Which types of specialized hardware do you use on a regular basis?

In [None]:
show_plot_by_qno("Q12")

# Q13 🧮 Approximately how many times have you used a TPU (tensor processing unit)?

In [None]:
show_plot_by_qno("Q13")

# Q14 🎨 What data visualization libraries or tools do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q14")

# Q15 For how many years have you used machine learning methods? 

In [None]:
show_plot_by_qno("Q15")

# Q16 Which of the following machine learning frameworks do you use on a regular basis? (Select all that apply) 

In [None]:
show_plot_by_qno("Q16")

# Q17 Which of the following ML algorithms do you use on a regular basis? (Select all that apply):

In [None]:
show_plot_by_qno("Q17")

# Q18 Which categories of computer vision methods do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q18")

# Q19 📚 Which of the following natural language processing (NLP) methods do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q19")

# Q20 🏢 What is the size of the company where you are employed?

In [None]:
show_plot_by_qno("Q20")

# Q21 Approximately how many individuals are responsible for data science workloads at your place of business?

In [None]:
show_plot_by_qno("Q21")

# Q22 Does your current employer incorporate machine learning methods into their business?

In [None]:
show_plot_by_qno("Q22")

# Q23 Select any activities that make up an important part of your role at work: (Select all that apply)

In [None]:
show_plot_by_qno("Q23")

# Q24 What is your current yearly compensation (approximate $USD)

In [None]:
show_plot_by_qno("Q24")

In [None]:
q24_count = answer["Q24"].value_counts().reset_index()
q24_count.columns = ["range", "count"]
q24_count

In [None]:
q24_count["min"] = q24_count["range"].str.split("-", expand=True)[0]
q24_count["max"] = q24_count["range"].str.split("-", expand=True)[1]
q24_count["min"] = q24_count["min"].str.replace(",|\$|>", "").astype(int)
q24_count = q24_count.sort_values("min")
q24_count

In [None]:
plt.figure(figsize=(10, 6))
g = sns.barplot(data=q24_count, 
                y="range", x="count", 
                palette="Blues").set_title(get_question_title_by_qno("Q24"))

In [None]:
q24_cols = q24_count.range.tolist()

In [None]:
q3_usa = answer[answer["Q3"].isin(["United States of America"])]
plt.figure(figsize=(10, 6))
g= sns.countplot(data=q3_usa, 
              y="Q24", 
              order=q24_cols, 
              palette="Blues").set_title("USA yearly compensation")

In [None]:
q3_korea = answer[answer["Q3"].isin(["South Korea"])]
plt.figure(figsize=(10, 6))
g = sns.countplot(data=q3_korea, 
                  y="Q24", 
                  order=q24_cols, 
                  palette="Blues").set_title("South Korea yearly compensation")

# Q25 Approximately how much money have you (or your team) spent on machine learning and/or cloud computing services at home (or at work) in the past 5 years (approximate $USD)?

In [None]:
show_plot_by_qno("Q25")

# Q26_A Which of the following cloud computing platforms do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q26_A")

# Q26_B Which of the following cloud computing platforms do you hope to become more familiar with in the next 2 years?

In [None]:
show_plot_by_qno("Q26_B")

# Q27_A Do you use any of the following cloud computing products on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q27_A")

# Q27_B In the next 2 years, do you hope to become more familiar with any of these specific cloud computing products? (Select all that apply)

In [None]:
show_plot_by_qno("Q27_B")

# Q28_A Do you use any of the following machine learning products on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q28_A")

# Q28_B In the next 2 years, do you hope to become more familiar with any of these specific machine learning products? (Select all that apply)

In [None]:
show_plot_by_qno("Q28_B")

# Q29_A Which of the following big data products (relational databases, data warehouses, data lakes, or similar) do you use on a regular basis? (Select all that apply)

In [None]:
show_plot_by_qno("Q29_A")

# Q29_B Which of the following big data products (relational databases, data warehouses, data lakes, or similar) do you hope to become more familiar with in the next 2 years? 

In [None]:
show_plot_by_qno("Q29_B")

# Q30 Which of the following big data products (relational database, data warehouse, data lake, or similar) do you use most often? - Selected Choice

In [None]:
show_plot_by_qno("Q30")

# Q31_A Which of the following business intelligence tools do you use on a regular basis?

In [None]:
show_plot_by_qno("Q31_A")

# Q31_B Which of the following business intelligence tools do you hope to become more familiar with in the next 2 years?

In [None]:
show_plot_by_qno("Q31_B")

# Q32 Which of the following business intelligence tools do you use most often? - Selected Choice

In [None]:
show_plot_by_qno("Q32")

# Q33_A Do you use any automated machine learning tools (or partial AutoML tools) on a regular basis?

In [None]:
show_plot_by_qno("Q33_A")

# Q33_B Which categories of automated machine learning tools (or partial AutoML tools) do you hope to become more familiar with in the next 2 years? (Select all that apply)

In [None]:
show_plot_by_qno("Q33_B")

# Q34_A Which of the following automated machine learning tools (or partial AutoML tools) do you use on a regular basis?

In [None]:
show_plot_by_qno("Q34_A")

# Q34_B Which specific automated machine learning tools (or partial AutoML tools) do you hope to become more familiar with in the next 2 years?

In [None]:
show_plot_by_qno("Q34_B")

# Q35 Do you use any tools to help manage machine learning experiments?

In [None]:
show_plot_by_qno("Q35_A")