In [None]:
!pip install plotly
!pip install pywaffle
!pip install squarify
!pip install pycountry

# 1. Imports, utils and data

In [None]:
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import squarify
import plotly.express as px
import pycountry
import plotly.graph_objects as go
from pywaffle import Waffle
import math
from collections import *

In [None]:
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [None]:
## utils.py

import json
import pickle

def save_json(object, filepath):
    with open(filepath, 'w') as fp:
        json.dump(object, fp)

def load_json(filepath):
    with open(filepath, 'r') as fp:
        return json.load(fp)

def save_pickle(object, filepath):
    with open(filepath, 'wb') as fp:
        pickle.dump(object, fp)

def load_pickle(filepath):
    with open(filepath, 'rb') as fp:
        return pickle.load(fp)

def add_log(log, filepath, mode="a"):
    with open(filepath, mode) as fp:
        fp.writelines([
            f"""\n{log}\n"""
        ])

flatten = lambda lst: [item for sublist in lst for item in sublist]


## data_utils.py

def counts_from_parts(dataframe, columns: list, to_column_names: list = ['property', 'count']):

    property = flatten(dataframe[columns].values.tolist())
    property = [p for p in property if not pd.isna(p)]
    counts = dict(Counter(property))
    property_counts = [{to_column_names[0]: k, to_column_names[1]: v}
                       for k, v in counts.items()]

    return property_counts, counts


def counts_from_multiple_parts(dataframe, columns: list, condition_column, to_column_names: list = ['property', 'count']):

    unique_cols_df = dataframe[columns]
    unq_cols_set = [v.rstrip().lstrip() for v in list(
        set(flatten(unique_cols_df.values.tolist()))) if not pd.isna(v)]
    condition_unique_dict = {con: {unq: 0 for unq in unq_cols_set}
                             for con in pd.unique(dataframe[condition_column]).tolist()}

    for ix, row in dataframe.iterrows():

        for c in columns:

            if not pd.isna(row[c]):
                condition_unique_dict[row[condition_column]
                                      ][row[c].rstrip().lstrip()] += 1

    return condition_unique_dict


## graph_utils.py

global global_color_codes, global_plotly_template

global_color_codes = [
    "#00FFFF", "#F0FFFF", "#89CFF0", "#0000FF", "#7393B3", "#088F8F",
    "#0096FF", "#5F9EA0", "#0047AB", "#6495ED", "#00FFFF", "#00008B",
    "#6F8FAF", "#1434A4", "#7DF9FF", "#6082B6", "#00A36C", "#3F00FF",
    "#5D3FD3", "#ADD8E6", "#191970", "#000080", "#1F51FF", "#A7C7E7",
    "#CCCCFF", "#B6D0E2", "#96DED1", "#4169E1", "#0F52BA", "#9FE2BF",
    "#87CEEB", "#4682B4", "#008080", "#40E0D0", "#0437F2", "#40B5AD", "#0818A8"
]

global_plotly_template = "plotly_dark"


def draw_plotly_bar(dataframe, x_column_name, y_column_name, title, x_axis_title, y_axis_title, show_legend=False):
    colors = global_color_codes
    if len(dataframe) >= len(colors):
        diff = len(dataframe) - len(colors) + 2
        colors = colors + colors[:diff]
    figure = px.bar(dataframe, x=x_column_name, y=y_column_name,
                    color=colors[-len(dataframe):], template=global_plotly_template, title=title)
    figure.update_layout({
        "yaxis": {
            "title": y_axis_title
        },
        "xaxis": {
            "title": x_axis_title
        },
        "showlegend": show_legend
    })

    figure.show()


def draw_plotly_pie(dataframe, values_column: str, names_column: str, title: str, hover_info: list, x_axis_title: str, y_axis_title: str, col2labels=None):
    colors = global_color_codes
    if len(dataframe) >= len(colors):
        diff = len(dataframe) - len(colors) + 2
        colors = colors + colors[:diff]
    figure = px.pie(
        dataframe, values=values_column, names=names_column, title=title, hover_data=hover_info, labels=col2labels, template=global_plotly_template
    )
    figure.update_traces(textposition='inside', textinfo='percent+label')
    figure.update_layout(
        {"yaxis": {"title": y_axis_title}, "xaxis": {"title": x_axis_title}})
    figure.show()


def draw_treemap(dataframe, count_column, labels_column, label_prefix=None):

    counts = dataframe[count_column].values.tolist()
    labels = [f"{label_prefix if label_prefix else ''} {l}" for l in dataframe[labels_column
                                                                               ].values.tolist()]
    color = global_color_codes[-len(dataframe):]
    plt.figure(figsize=(20, 10))
    squarify.plot(sizes=counts, label=labels, color=color, alpha=.9)
    plt.axis("off")
    plt.show()


def draw_waffle(data_dict, title_label, font_size, face_color, text_color, is_percent=False):
    if not is_percent:
        totals = sum([v for k, v in data_dict.items()])
        data_dict = {k: round(math.ceil((v/totals)*100))
                     for k, v in data_dict.items()}
    fig = plt.figure(FigureClass=Waffle, rows=5, values=data_dict, colors=global_color_codes[:len(data_dict)], title={
        "label": title_label,
        "loc": "left"
    }, legend={"loc": "upper left", "bbox_to_anchor": (1, 1)}, figsize=(20, 10), starting_location="NW", block_arranging_style="snake")

    fig.set_facecolor(face_color)

    plt.rcParams['text.color'] = text_color
    plt.rcParams['font.size'] = font_size

    plt.show()


def draw_donut(dataframe, count_column, names_column, face_color, font_size, text_color, is_percent=False):

    names = dataframe[names_column].values.tolist()
    sizes = dataframe[count_column].values.tolist()
    if is_percent:
        names = [f"{names[ix]} - {sizes[ix]}%" for ix in range(len(names))]
    else:
        names = [
            f"{names[ix]} - {round(math.ceil((sizes[ix]/sum(sizes)) * 100))}%" for ix in range(len(names))]

    fig = plt.figure()
    fig.patch.set_facecolor(face_color)

    plt.rcParams['text.color'] = text_color
    plt.rcParams['font.size'] = font_size

    circle = plt.Circle((0, 0), 1, color=face_color)

    plt.pie(sizes, labels=names, radius=4,
            colors=global_color_codes[len(sizes):])
    p = plt.gcf()
    p.gca().add_artist(circle)
    plt.show()

    
def draw_plotly_stack_chart(data_dict, title):

    graph_df = pd.DataFrame.from_dict(data_dict)
    graph_df["name"] = graph_df.index.tolist()

    figure = px.bar(graph_df, x="name", y=[
                    c for c in graph_df.columns if not c == "name"], template=global_plotly_template, title=title)
    figure.show()


def draw_plotly_group_chart(data_dict, title):

    figure = go.Figure(data=[
        go.Bar(name=k, x=list(_v.keys()), y=list(_v.values()))
        for k, _v in data_dict.items()
    ])
    figure.update_layout(
        barmode='group', template=global_plotly_template, title=title)
    figure.show()


In [None]:
csv_data = pd.read_csv("/kaggle/input/kaggle-survey-2021/kaggle_survey_2021_responses.csv")

In [None]:
question2text = csv_data.iloc[0].to_dict()
text2question = {v:k for k,v in question2text.items()}

csv_data.drop(0, axis = 0, inplace = True)
csv_data.reset_index(inplace = True)

In [None]:
csv_data.head()

In [None]:
countries_2_code = {k.name:k.alpha_3 for k in pycountry.countries}
countries_2_code["United Kingdom of Great Britain and Northern Ireland"] = "GBR"
countries_2_code["South Korea"] = "PRK"
countries_2_code["Taiwan"] = "TWN"
countries_2_code["Iran, Islamic Republic of..."] = "IRN"
countries_2_code["Hong Kong (S.A.R.)"] = "HKG"
countries_2_code["Czech Republic"] = "CZE"
# len(countries_2_code)

# 2. Age Distribution among Survey Takers

In [None]:
age_counts, counts = counts_from_parts(csv_data, ["Q1"], ['age_range', 'count'])
age_df = pd.DataFrame(age_counts, index = [i for i in range(len(age_counts))])
age_df.sort_values(by=['count'], inplace = True, ascending=False)
draw_plotly_bar(dataframe=age_df,
                x_column_name="age_range",
                y_column_name="count",
                title="Survey Takers Age Distribution",
                x_axis_title="Age Range",
                y_axis_title="Count")

In [None]:
draw_plotly_pie(dataframe=age_df,
                values_column="count",
                names_column="age_range",
                title="Survey Takers Age Distribution",
                hover_info=['count'],
                x_axis_title="Age Range",
                y_axis_title="Count",
                col2labels={
                    "count": "Count",
                    "age_range": "Age Range"
                })

In [None]:
draw_treemap(age_df, count_column="count", labels_column="age_range", label_prefix="Age")

# 3. Where are the Survey Takers from 🗺️?

In [None]:
country_counts, counts = counts_from_parts(csv_data, columns = ["Q3"], to_column_names=['country', 'counts'])
country_counts_df = pd.DataFrame(country_counts, index = [i for i in range(len(country_counts))])
country_counts_df.sort_values(by = ['counts'], inplace = True, ascending=False)

draw_plotly_bar(country_counts_df, "country", "counts", "Where are the Survey Takers from? 🗺️", x_axis_title="Country", y_axis_title="Count")

In [None]:
draw_plotly_pie(
    dataframe = country_counts_df,
    values_column="counts",
    names_column="country",
    title="Where are the Survey Takers from? 🗺️",
    hover_info=['counts'],
    x_axis_title="Country",
    y_axis_title="Count",
    col2labels={"counts": "Count"}
)

In [None]:
draw_plotly_pie(
    dataframe = country_counts_df.head(),
    values_column="counts",
    names_column="country",
    title="Where are the Survey Takers from? 🗺️",
    hover_info=['counts'],
    x_axis_title="Country",
    y_axis_title="Count",
    col2labels={"counts": "Count"}
)

In [None]:
country_counts_df.iloc[1].country = "United States"
country_counts_df.at[1, 'country'] = "United States"
country_counts_df.country[1] = "United States"
country_counts_df.country[6] = "Russia"
countries_2_code["United States of America"] = "USA"
countries_2_code["Russia"] = "RUS"
country_counts_df["codes"] = [
    countries_2_code[c]
    if not c in ["Other", "I do not wish to disclose my location"] else None
    for c in country_counts_df.country.values.tolist()
]
country_counts_df.dropna(inplace=True)
country_counts_df.reset_index(inplace=True)
figure = go.Figure(data=go.Choropleth(
    locations=country_counts_df["codes"],
    z=country_counts_df["counts"],
    text=country_counts_df["country"],
    colorscale="Greens",
    autocolorscale=False,
    reversescale=False,
    marker_line_color="darkgray",
    marker_line_width=0.5,
    colorbar_title="Survey Taker Counts",
))

figure.update_layout(template=global_plotly_template,
                     title_text="Where are the Survey Takers from? 🗺️",
                     geo=dict(showframe=False,
                              showcoastlines=False,
                              projection_type='equirectangular'))

figure.show()

# 4. Sex Ratio of the Survey Takers 🚹 🚺 🙅‍♂️ 🙅‍♀️

In [None]:
sex_ratio_counts, counts = counts_from_parts(csv_data, to_column_names=['gender', 'counts'], columns = ['Q2'])
sex_ratio_df = pd.DataFrame(sex_ratio_counts, index = [i for i in range(len(sex_ratio_counts))])
sex_ratio_df.sort_values(by=['counts'], inplace = True, ascending=False)
sex_ratio_df.reset_index(inplace = True)
draw_plotly_bar(
    dataframe=sex_ratio_df,
    x_column_name='gender',
    y_column_name='counts', 
    title = "Sex Ratio of Survey Takers 🚹 🚺 🙅‍♂️ 🙅‍♀️", x_axis_title="Gender", y_axis_title="Count"
)

In [None]:
draw_waffle(
    data_dict=counts, title_label="Sex Ratio of Survey Takers", font_size = 22, face_color = "black",  text_color="white"
)

# 5. Programming Language Popularity 💻

In [None]:
languages_used_counts, counts = counts_from_parts(csv_data, columns=[
        "Q7_Part_1",
        "Q7_Part_2",
        "Q7_Part_3",
        "Q7_Part_4",
        "Q7_Part_5",
        "Q7_Part_6",
        "Q7_Part_7",
        "Q7_Part_8",
        "Q7_Part_9",
        "Q7_Part_10",
        "Q7_Part_11",
        "Q7_Part_12",
    ], to_column_names=["language", "counts"])
languages_counts_df = pd.DataFrame(languages_used_counts, index = [i for i in range(len(languages_used_counts))])
languages_counts_df.sort_values(by=['counts'], inplace = True, ascending=False)

draw_plotly_bar(dataframe = languages_counts_df, x_column_name="language", y_column_name="counts", title = "Popular Programming Languages", x_axis_title="Programming Language", y_axis_title="Count"
)

In [None]:
draw_treemap(languages_counts_df, "counts", "language")

In [None]:
draw_plotly_pie(languages_counts_df, values_column="counts", names_column="language", title = "Popular Programming Languages", hover_info=['counts'], x_axis_title="Programming Language", y_axis_title="Count", col2labels={"counts": "Count"})

# 6. Popular IDEs

In [None]:
ides_used_counts, counts = counts_from_parts(csv_data, [
    "Q9_Part_1", "Q9_Part_2", "Q9_Part_3", "Q9_Part_4", "Q9_Part_5",
    "Q9_Part_6", "Q9_Part_7", "Q9_Part_8", "Q9_Part_9", "Q9_Part_10",
    "Q9_Part_11", "Q9_Part_12"
], ['ide', 'counts'])
ides_counts_df = pd.DataFrame(ides_used_counts, index = [i for i in range(len(ides_used_counts))])
ides_counts_df.sort_values(by=['counts'], inplace = True, ascending=False)

draw_donut(ides_counts_df, "counts", "ide", "black", 22, "white")

#  7. Recommended Programming Languages for Beginners 

In [None]:
recommended_used_languages, counts = counts_from_parts(csv_data, ["Q8"], ['language', 'counts'])
recommended_language_df = pd.DataFrame(recommended_used_languages, index = [i for i in range(len(recommended_used_languages))])
recommended_language_df.sort_values(by=['counts'], inplace = True, ascending=False)
draw_waffle(counts, title_label="Recommended Programming Language for Beginners", font_size=22, face_color="black", text_color="white")

In [None]:
draw_plotly_pie(recommended_language_df, "counts", "language", title = "Recommended Programming Languages for Beginners", hover_info=['counts'], x_axis_title="Programming Language", y_axis_title="Count", col2labels={"counts": "Count"})

# 8. Popular Specialized Hardware

In [None]:
specialized_hardware, counts = counts_from_parts(csv_data, [
    "Q12_Part_1", "Q12_Part_2", "Q12_Part_3", "Q12_Part_4", "Q12_Part_5",
    "Q12_OTHER"
], ["hardware", 'counts'])
specialized_hardware_df = pd.DataFrame(
    specialized_hardware, index=[i for i in range(len(specialized_hardware))])
specialized_hardware_df.sort_values(by=['counts'],
                                    inplace=True,
                                    ascending=False)
draw_plotly_bar(specialized_hardware_df, 'hardware', 'counts', "Popular Specialized Hardware 💻", "Hardware Name", "Count")

In [None]:
draw_treemap(specialized_hardware_df, 'counts', 'hardware')

# 9. Popular Hosted Notebooks

In [None]:
hosted_notebook_counts, counts = counts_from_parts(csv_data, [
    "Q10_Part_1", "Q10_Part_2", "Q10_Part_3", "Q10_Part_4", "Q10_Part_5",
    "Q10_Part_6", "Q10_Part_7", "Q10_Part_8", "Q10_Part_9", "Q10_Part_10",
    "Q10_Part_11", "Q10_Part_12", "Q10_Part_13", "Q10_Part_14", "Q10_Part_15",
    "Q10_Part_16", "Q10_OTHER"
], ['hosted_notebook', 'count'])

hosted_notebook_df = pd.DataFrame(
    hosted_notebook_counts,
    index=[i for i in range(len(hosted_notebook_counts))])
hosted_notebook_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(hosted_notebook_df, 'count', 'hosted_notebook', 'Most Popular Hosted Notebook Platforms', 'Count', 'Hosted Platform')

In [None]:
draw_donut(hosted_notebook_df.head(10), 'count', 'hosted_notebook', font_size=12, face_color="black", text_color="white")

# 10.  Most popular Visualization Libraries

In [None]:
vis_library_data, counts = counts_from_parts(csv_data, [
    "Q14_Part_1", "Q14_Part_2", "Q14_Part_3", "Q14_Part_4", "Q14_Part_5",
    "Q14_Part_6", "Q14_Part_7", "Q14_Part_8", "Q14_Part_9", "Q14_Part_10",
    "Q14_Part_11", "Q14_OTHER"
], ['library', 'count'])
vis_library_df = pd.DataFrame(vis_library_data,
                              index=[i for i in range(len(vis_library_data))])
vis_library_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_pie(vis_library_df, 'count', 'library', 'Most Popular Visualization Libraries', ['count'], 'Library', 'Count', {"count": "Count"})

# 11.  Popular Machine Learning Frameworks

In [None]:
ml_framework_counts, counts = counts_from_parts(csv_data, [
    "Q16_Part_1", "Q16_Part_2", "Q16_Part_3", "Q16_Part_4", "Q16_Part_5",
    "Q16_Part_6", "Q16_Part_7", "Q16_Part_8", "Q16_Part_9", "Q16_Part_10",
    "Q16_Part_11", "Q16_Part_12", "Q16_Part_13", "Q16_Part_14", "Q16_Part_15",
    "Q16_Part_16", "Q16_Part_17", "Q16_OTHER"
], ["ml_framework", 'count'])
ml_framework_df = pd.DataFrame(ml_framework_counts, index = [i for i in range(len(ml_framework_counts))])
ml_framework_df.sort_values(by=['count'], inplace = True, ascending = False)
draw_waffle(counts,
            "Popular ML Frameworks",
            font_size=22,
            face_color="black",
            text_color="white")

In [None]:
draw_plotly_pie(ml_framework_df.head(10), 'count', 'ml_framework',
                'Top 10 most popular ML Frameworks', ["count"],
                "Framework Name", "Count", {"count": "Count"})

# 12.  Years of experience with Machine Learning

In [None]:
exp_data, counts = counts_from_parts(csv_data, ["Q15"], ["exp", "count"])
exp_df = pd.DataFrame(exp_data, index=[i for i in range(len(exp_data))])
exp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(exp_df, "count", "exp", 'Years of experiance with ML', 'Count', 'Experience')

# 13.  Machine Learning Applicability in different Industries

In [None]:
mla_data, counts = counts_from_parts(csv_data, ["Q20"], ["mla", "count"])
mla_df = pd.DataFrame(mla_data, index=[i for i in range(len(mla_data))])
mla_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(mla_df, 'mla', 'count', "Applicability of ML Algorithms in different Industries", 'Industry', 'Count')

In [None]:
draw_plotly_pie(mla_df.head(10), 'count', 'mla',
                'Top 10 Industries leveraging ML Algorithms',
                ['count'], 'Industry', 'Count', {"count": "Count"})

# 14.  Size of Data Science Teams

In [None]:
dst_data, counts = counts_from_parts(csv_data, ['Q21'], ['dst', 'count'])
dst_df = pd.DataFrame(dst_data, index = [i for i in range(len(dst_data))])
dst_df.sort_values(by=['count'], inplace = True, ascending = False)
draw_waffle(counts, "Size of Data Science Teams", font_size=22, face_color="black", text_color="white")

In [None]:
draw_donut(dst_df, "count", "dst", font_size=22, face_color="black", text_color="white")

# 15. Money Spent on ML and/or Cloud Computing service in $USD in the last 5 years

In [None]:
amt_data, counts = counts_from_parts(csv_data, ['Q26'], ['amt', 'count'])
amt_data = [{
    "amt":
    "".join([
        a for a in re.sub("-", " to ", re.sub("$", " ", a['amt'])) + " USD "
        if not a == "$"
    ]),
    "count":
    a['count']
} for a in amt_data]
amt_df = pd.DataFrame(amt_data, index=[i for i in range(len(amt_data))])
amt_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(amt_df, "amt", "count", "Amount Spent on ML/Cloud Computing Services in last 5 years (💵)", "Amount", "Count")

In [None]:
draw_waffle(counts,
            "Amount Spent on ML/Cloud Computing Services in last 5 years ($)",
            font_size=22,
            face_color="black",
            text_color="white")

# 16. Popular Cloud Computing Platforms

In [None]:
ccp_data, counts = counts_from_parts(csv_data, [
    'Q27_A_Part_1', 'Q27_A_Part_2', 'Q27_A_Part_3', 'Q27_A_Part_4',
    'Q27_A_Part_5', 'Q27_A_Part_6', 'Q27_A_Part_7', 'Q27_A_Part_8',
    'Q27_A_Part_9', 'Q27_A_Part_10', 'Q27_A_Part_11', 'Q27_A_OTHER'
], ['platforms', 'count'])
ccp_df = pd.DataFrame(ccp_data, index=[i for i in range(len(ccp_data))])
ccp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_pie(ccp_df, 'count', 'platforms',
                "Popular Cloud Computing Platforms", ['count'], "Platforms",
                "Count", {"count": "Count"})

# 17.  Popularity of Cloud Platform based on best Developer Experience

In [None]:
ccp_data, counts = counts_from_parts(csv_data, [
    "Q28"
], ['platforms', 'count'])
ccp_df = pd.DataFrame(ccp_data, index=[i for i in range(len(ccp_data))])
ccp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_donut(ccp_df, "count", "platforms", font_size=12, face_color="black", text_color="white")

In [None]:
draw_waffle(counts,
            "Popularity of Cloud Platform based on best Developer Experience",
            font_size=22,
            face_color="black",
            text_color="white")

# 18.  Most used Cloud Computing Products regularly

In [None]:
ccp_data, counts = counts_from_parts(csv_data, [
    "Q29_A_Part_1", "Q29_A_Part_2", "Q29_A_Part_3", "Q29_A_Part_4",
    "Q29_A_OTHER"
], ['platforms', 'count'])
ccp_df = pd.DataFrame(ccp_data, index=[i for i in range(len(ccp_data))])
ccp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(ccp_df, "count", "platforms", "Most used Cloud Computing Products on a day-to-day basis", "Count", "Product")

In [None]:
draw_treemap(ccp_df, "count", "platforms")

# 19.  Popular Data Storage Products

In [None]:
csp_data, counts = counts_from_parts(csv_data, [
    "Q30_A_Part_1", "Q30_A_Part_2", "Q30_A_Part_3", "Q30_A_Part_4",
    "Q30_A_Part_5", "Q30_A_Part_6", "Q30_A_Part_7", "Q30_A_OTHER"
], ['storage', 'count'])
csp_df = pd.DataFrame(csp_data, index=[i for i in range(len(csp_data))])
csp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_donut(csp_df, 'count', 'storage', font_size=22, face_color="black", text_color="white")

In [None]:
draw_waffle(counts, "Popular Data Storage Products", font_size=22, face_color="black", text_color="white")

# 20.  Popular Managed Machine Learning Products

In [None]:
mml_data, counts = counts_from_parts(csv_data, [
    "Q31_A_Part_1", "Q31_A_Part_2", "Q31_A_Part_3", "Q31_A_Part_4",
    "Q31_A_Part_5", "Q31_A_Part_6", "Q31_A_Part_7", "Q31_A_Part_8",
    "Q31_A_Part_9", "Q31_A_OTHER"
], ['mml', 'count'])
mml_df = pd.DataFrame(mml_data, index=[i for i in range(len(mml_data))])
mml_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(mml_df, "mml", "count", "Popular Managed ML Products", "Managed ML Product", "Count")

# 21.  Popular Big Data Products (relational dbs, data warehouses, data lakes, or similar)

In [None]:
bdp_data, counts = counts_from_parts(csv_data, [
    "Q32_A_Part_1", "Q32_A_Part_2", "Q32_A_Part_3", "Q32_A_Part_4",
    "Q32_A_Part_5", "Q32_A_Part_6", "Q32_A_Part_7", "Q32_A_Part_8",
    "Q32_A_Part_9", "Q32_A_Part_10", "Q32_A_Part_11", "Q32_A_Part_12",
    "Q32_A_Part_13", "Q32_A_Part_14", "Q32_A_Part_15", "Q32_A_Part_16",
    "Q32_A_Part_17", "Q32_A_Part_18", "Q32_A_Part_19", "Q32_A_Part_20",
    "Q32_A_OTHER"
], ['bdp', 'count'])
bdp_df = pd.DataFrame(bdp_data, index=[i for i in range(len(bdp_data))])
bdp_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(bdp_df, "count", "bdp", "Popular Big Data products", "Count",
                "Big Data Product")

# 22. Popular Business Intelligence Tools

In [None]:
bit_data, counts = counts_from_parts(csv_data, [
    "Q34_A_Part_1", "Q34_A_Part_2", "Q34_A_Part_3", "Q34_A_Part_4",
    "Q34_A_Part_5", "Q34_A_Part_6", "Q34_A_Part_7", "Q34_A_Part_8",
    "Q34_A_Part_9", "Q34_A_Part_10", "Q34_A_Part_11", "Q34_A_Part_12",
    "Q34_A_Part_13", "Q34_A_Part_14", "Q34_A_Part_15", "Q34_A_Part_16",
    "Q34_A_OTHER"
], ['bit', 'count'])
bit_df = pd.DataFrame(bit_data, index=[i for i in range(len(bit_data))])
bit_df.sort_values(by=['count'], inplace=True, ascending=False)
draw_plotly_bar(bit_df, "bit", "count", "Popular BI Tools", "BI Tools",
                "Count")

In [None]:
draw_plotly_pie(bit_df.head(10), "count", "bit", "Top 10 BI Tools", ["count"],
                "BI Tools", "Count", {"count": "Count"})