In [1]:
# --- CSS STYLE ---
from IPython.core.display import HTML
def css_styling():
    styles = open("../input/kaggle-data-science-survey-20172021/style.css", "r").read()
    return HTML("<style>"+styles+"</style>")
css_styling()

<center><img src="https://i.imgur.com/iOHRYxA.png"></center>
<center><h1 style="font-family: parklane">How are the 💃Ladies and the 🎩Gents doing?</h1></center>
<center><h2 style="font-family: times-new-roman">- An analysis of parties in comparison and in time -</h2></center>

<h1 style="font-family: parklane">Foreword</h1>

<p style="font-family: times-new-roman">This notebook is a celebration of both Ladies and Gentlemen, a deep dive into what is their evolution, some differences, similarities and overall story of their journey.</p>
<p style="font-family: times-new-roman">This notebook will not be a showcase of why or why not the numbers for women don't match with the ones for men. Rather, this will be a celebration of differences and an omage for both genders and their own pace and beauty unfolding into Data Science. In the end, we are all people.</p>
<p style="font-family: times-new-roman">So, if you are curious and you dare ...</p>
<p style="font-family: times-new-roman"><b>🎉 It's a party! 🎉</b></p>

<h1 style="font-family: parklane">Methodology</h1>

<p style="font-family: times-new-roman">This year I have decided to take a 2 dimensional look into things: A first look regarding how Ladies and Gents are conducting their Data Sciencing, as well as a second look through 🕒time and how it has impacted the preferences and overall way we do Data Science.</p>

<p style="font-family: times-new-roman">Hence, I have taken all Kaggle Data Science Surveys starting 2021 and went back in time up until 2017, gathering as much information I could for each year. Hence, I ended up with a table of 106,000 rows and 293 columns, containing aggregated information for 5 years: from 2021 to 2017 inclusively.</p>

<p style="font-family: times-new-roman">The aggregation was done <i>manually</i>, as the order of the columns, the naming of the questions as well as methodology changed from one year to another.</p>

> <p style="font-family: times-new-roman">📌 <b>Note</b>: there have been years where some questions or choices have not been provided. Hence, I matched them as better as possible and, to keep everything clean and compare in a precise manner, I am going to compute the proportions on year and gender instead of overall numbers.</p>

<h3 style="font-family: parklane">📖 Libraries</h3>

In [2]:
import os
import pandas as pd
import numpy as np
import imageio
import wandb
from PIL import Image
from IPython.core.display import display, HTML, Javascript
import IPython.display as py_display
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

os.environ["WANDB_SILENT"] = "true"
CONFIG = {'competition': 'kaggle-survey-2021', '_wandb_kernel': 'aot'}

# Secrets
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb")

# Log in W&B personal API
! wandb login $secret_value_0

<h3 style="font-family: parklane">📖 Functions & Helpers</h3>

In [3]:
# =============== FUNCTIONS ===============
def save_dataset_artifact(run_name, artifact_name, path):
    '''Saves dataset to W&B Artifactory.
    run_name: name of the experiment
    artifact_name: under what name should the dataset be stored
    path: path to the dataset'''
    
    run = wandb.init(project='kaggle-survey-2021', 
                     name=run_name, 
                     config=CONFIG, anonymous="allow")
    artifact = wandb.Artifact(name=artifact_name, 
                              type='dataset')
    artifact.add_file(path)

    wandb.log_artifact(artifact)
    wandb.finish()
    
    
def create_wandb_plot(x_data=None, y_data=None, x_name=None, y_name=None, 
                      title=None, log=None, plot="line"):
    '''Create and save lineplot/barplot in W&B Environment.
    x_data & y_data: Pandas Series containing x & y data
    x_name & y_name: strings containing axis names
    title: title of the graph
    log: string containing name of log'''
    
    data = [[label, val] for (label, val) in zip(x_data, y_data)]
    table = wandb.Table(data=data, columns = [x_name, y_name])
    
    if plot == "line":
        wandb.log({log : wandb.plot.line(table, x_name, y_name, title=title)})
    elif plot == "bar":
        wandb.log({log : wandb.plot.bar(table, x_name, y_name, title=title)})
    elif plot == "scatter":
        wandb.log({log : wandb.plot.scatter(table, x_name, y_name, title=title)})
        
        
def create_wandb_hist(x_data=None, x_name=None, title=None, log=None):
    '''Create and save histogram in W&B Environment.
    x_data: Pandas Series containing x values
    x_name: strings containing axis name
    title: title of the graph
    log: string containing name of log'''
    
    data = [[x] for x in x_data]
    table = wandb.Table(data=data, columns=[x_name])
    wandb.log({log : wandb.plot.histogram(table, x_name, title=title)})
    
    
def wb_prep_data(df):
    # First 2 columns: Year and Gender
    # The rest: Categories

    df = df[df["Year"]==2021]

    df = df.melt(id_vars=["Year", "Gender"],
                 value_vars=list(df.columns[2:]),
                 value_name="Count")
    df.columns = ["Year", "Gender", "Category", "Count"]
    man = df[df["Gender"] == "Man"]
    woman = df[df["Gender"] == "Woman"]
    return man, woman


def get_data_multiple_answers(cols, new_names):
    '''Select and prep data for the columns mentiones.
    It contains year & gender data as well.
    
    cols: must be ar array of valid names from 1 question
    new_names: the new names these coluns should have
    return: new prepped data (in percentages per year & gender)'''
    
    cols.extend(["Year", "What is your gender? - Selected Choice"])
    new_names.extend(["Year", "Gender"])

    data = df[cols]
    data.columns = new_names
    data = data[data["Gender"].isin(["Man", "Woman"])]

    data = data.groupby(["Year", "Gender"]).count().reset_index()
    data = pd.merge(data, total, on=["Year", "Gender"])

    cols_perc = list(data.columns[2:-1])
    for c in cols_perc:
        data[c] = data[c]/data["Count"] * 100
        data[c] = data[c].apply(lambda x: round(x, 1))
    data.drop(columns="Count", inplace=True)
    
    return data
    
    
# =============== PREP ===============
# Read in the dataframe
df = pd.read_csv("../input/kaggle-data-science-survey-20172021/kaggle_survey_2017_2021.csv",
                 skiprows=1)

# Clean gender column
col = "What is your gender? - Selected Choice"
df[col] = df[col].replace(["Male", "Female", "Prefer to self-describe", "Prefer not to say", "A different identity", 
                         "Non-binary, genderqueer, or gender non-conforming"], 
                        ["Man", "Woman", "Unspecified", "Unspecified", "Unspecified", "Nonbinary"])

# Total
total = df[["Year", "What is your gender? - Selected Choice"]].value_counts().reset_index()
total.columns = ["Year", "Gender", "Count"]

# save_dataset_artifact(run_name="save_full_data", 
#                       artifact_name="kaggle_survey_2017_2021", 
#                       path="../input/kaggle-data-science-survey-20172021/kaggle_survey_2017_2021.csv")
# wandb.finish()

# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='numbers_eda', 
#                  config=CONFIG, anonymous="allow")
# wandb.log({"2017-2021 rows" : int(df.shape[0]),
#            "2017-2021 cols" : int(df.shape[1])})
# wandb.finish()

<div class="alert success-alert" style="font-family: times-new-roman">
  <h4 style="font-family: times-new-roman"><center>👀 Don't forget to <b>hover</b> your mouse over the charts and <b>interact</b> with them!</center></h4>
</div>

<h1 style="font-family: parklane">1. Ladies & Gentlemen, the floor is open!</h1>

<h2 style="font-family: parklane">1.1 How many came to the party?</h2>

<p style="font-family: times-new-roman">The golden ratio, 80:20.</p>

<p style="font-family: times-new-roman">Throughout years, no matter the number of overall respondents answering the survey, the percentage of women to men was kept almost the same. Hence, we are looking at a pool of around <b>80% men and almost 20% women</b>.</p>

<p style="font-family: times-new-roman">We also see that we had the most answers in 2021! Not only Kaggle is becoming more and more popular, but the survey is too!</p>

In [4]:
col = "What is your gender? - Selected Choice"

gender = df[df[col].isin(["Man", "Woman"])][["Year", col]]
gender.columns = ["Year", "Gender"]

# Gender and Years
gender1 = gender.value_counts().reset_index()
gender1 = pd.pivot(data=gender1, index="Year", columns="Gender", values=0).reset_index()

# Just Gender
gender2 = gender["Gender"].value_counts().reset_index()
gender2.columns = ["Gender", "Count"]
gender2["Gender"] = gender2["Gender"].replace(["Man", "Woman"],
                                              ["Gentlemen", "Ladies"])

gender1.to_csv("gender1.csv", index=False)
gender2.to_csv("gender2.csv", index=False)

# Save PNGs
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man.png')
Image.fromarray(im).save("man.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady.png')
Image.fromarray(im).save("lady.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/bkg.jpg')
Image.fromarray(im).save("bkg.jpg")


htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
	stroke: #e0c9a6;
	}

	.axisHidden line{
	stroke: #e0c9a6;
	}

	.annotation.red text {
		fill: #7D262F;
	}
	.annotation.blue text {
		fill: #184E8B;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all">
	<center><h1 style="font-family: parklane">How many ladies & gentlemen?</h1></center>
	<div class="row">
		<div id="map1" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all = document.getElementsByClassName('all')[0];
$all.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;


// Set the SVG area
const margin = { LEFT: 30, RIGHT: 30, TOP: 10, BOTTOM: 30 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM

const svg = d3.select("#map1").append("svg")
  // .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)


// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man.png")
    .attr("x", "11%")
    .attr("y", "-1%")
    .attr("width", 370)
    .attr("height", 220)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady.png")
    .attr("x", "33%")
    .attr("y", "13%")
    .attr("width", 370)
    .attr("height", 210)
    .style("opacity", 1)

// === Gender overall ===
d3.csv(baseAssetsUrl + "gender2.csv").then(function(data){ 

  const x_global = 150
  var color = d3.scaleOrdinal(["#0F3157", "#7D262F"]);
  

  data.forEach(d => {
    d.Count = Number(d.Count)
  })

  // Scales
  const x = d3.scaleBand()
    .range([0, width/1.7])
    .domain(data.map(d => d.Gender))
    .paddingInner(0.3)
    .paddingOuter(0.2)

  const y = d3.scaleLinear()
    .range([height/5, 0])
    .domain([0, d3.max(data, d => d.Count)])

  const xAxisGroup = svg.append("g")
    .attr("class", "x axis")
    .attr("transform", `translate(${x_global}, ${height/2.3})`)

  const yAxisGroup = svg.append("g")
    .attr("class", "axisHidden")
    .attr("transform", `translate(${x_global}, ${width/4.6})`)

  // Axis
  const xAxisCall = d3.axisBottom(x)
  xAxisGroup.call(xAxisCall)
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "18px")
      .attr("font-weight", 600)
      .attr("text-anchor", "middle")

  const yAxisCall = d3.axisLeft(y)
    .ticks(0)
  yAxisGroup.call(yAxisCall)

  // Bar Chart
  const rects = svg.selectAll()
    .data(data)
    .enter().append('g')
    .append("rect")
    .attr("transform", `translate(${x_global}, ${width/4.6})`)
    .attr("y", d => y(d.Count))
    .attr("x", d => x(d.Gender))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr("width", x.bandwidth)
    .attr("height", d => height/5 - y(d.Count))
    .style('fill', function(d, i) {return color(i);})
    .style("stroke-width", "1px")

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

    rects
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    d3.selectAll('.value')
          .attr('opacity', 0)

    d3.select(this)
        .transition()
        .duration(100)
        .attr('opacity', 0.3)
        .attr('x', (a) => x(a.Gender) - 2.5)
        .attr('width', x.bandwidth() + 5)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);
    let num = dt.Count;
    div.html(num)
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll('.value')
          .attr('opacity', 1)

      d3.select(this)
      .transition()
      .duration(100)
      .attr('opacity', 1)
      .attr('x', (a) => x(a.Gender))
      .attr('width', x.bandwidth())

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })
  
});


// === Gender Years ===
d3.csv(baseAssetsUrl + "gender1.csv").then(function(data){ 

  const x_global = 50
  // List of subgroups = header of the csv files = soil condition here
  const subgroups = data.columns.slice(1)

  // List of groups = species here = value of the first column called group -> I show them on the X axis
  const groups = data.map(d => d.Year)

  // Add X axis
  const x = d3.scaleBand()
      .domain(groups)
      .range([0, width/1.1])
      .padding([0.2])
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/1})`)
    .call(d3.axisBottom(x).tickSizeOuter(0))
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "15px")
      .attr("font-weight", 300)
      .attr("text-anchor", "middle")

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, 25000])
    .range([ height/3.2, 0]);
  svg.append("g")
  .attr("transform", `translate(${x_global}, ${height/1.5})`)
    .call(d3.axisLeft(y).ticks(6))
    .selectAll("text")
      .attr("font-size", "11px")
      .attr("font-weight", 300);

  // color palette = one color per subgroup
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#0F3157", "#7D262F"]);

  //stack the data? --> stack per subgroup
  const stackedData = d3.stack()
    .keys(subgroups)
    (data)

  // Show the bars
  const bars = svg.append("g")
    .selectAll("g")
    .data(stackedData)
    .join("g")
    .attr("transform", `translate(${x_global}, ${height/1.5})`)
      .attr("fill", d => color(d.key))
      .attr("class", d => "myRect " + d.key ) 
      .selectAll("rect")
      .data(d => d)
      .join("rect")
        .attr("x", d => x(d.data.Year))
        .attr("y", d => y(d[1]))
        .attr("height", d => y(d[0]) - y(d[1]))
        .attr("width",x.bandwidth())

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

  bars
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    const subGroupName = d3.select(this.parentNode).datum().key 
          
    d3.selectAll(".myRect").style("opacity", 0.2)  
          
    d3.selectAll("."+subGroupName).style("opacity",1)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);

    const subgroupName = d3.select(this.parentNode).datum().key;
    const subgroupValue = dt.data[subgroupName];

    div.html("Gender: " + subgroupName + "<br>" + "Frequency: " + subgroupValue)
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll(".myRect")
          .style("opacity",1) 

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })


  svg.append("text")
  .attr("y", "60%")
  .attr("x", "45%")
  .attr("font-size", "25px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gender Evolution")
  
});



// Legend
const annot1 = [
  {
  note: { 
    label: "Out of all respondents in 5 years, 20% of them are women and the rest 80% are men.",
    title: "20:80 Percentage",
    wrap: 200, 
    padding: 0, 
  },
  className: "red",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 630,
  y: 320,
  dy: -30,
  dx: 40
},
{
  note: { 
    label: "Keep in mind that some respondents might have answered multiple years.",
    title: "Non Unique",
    wrap: 150, 
    padding: 0,  
  },
  className: "blue",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 190,
  y: 190,
  dy: -30,
  dx: -40
},
{
  note: { 
    label: "Between 2017 and 2021, the % of women that responded the survey was raughly between 17% and 20%.",
    title: "% mantained",
    wrap: 250, 
    padding: 0,  
  },
  className: "red",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 290,
  y: 570,
  dy: -30,
  dx: -10
}]

svg.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [5]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='basics_eda', 
#                  config=CONFIG, anonymous="allow")
# create_wandb_plot(x_data=gender2["Gender"], y_data=gender2["Count"],
#                   x_name="Gender", y_name="Frequency", 
#                   title="Total Respondents", log="gender1", plot="bar")
# wandb.finish()

<h2 style="font-family: parklane">1.2 Age is just a formality</h2>

<p style="font-family: times-new-roman">Here I found something fascinating! If you flip through years you will notice that starting with the year 2017 all the way to 2021, the number of respondents aged <b>18 to 24 increases every year</b>!</p>

<p style="font-family: times-new-roman">Younger ladies and gents find their way into the platform and taking this survey each year. This also shows an <b>increase in Data Science popularity within young students</b>.</p>

In [6]:
cols = ["Year", "What is your age (# years)?", 
        "What is your gender? - Selected Choice"]

age = df[cols].value_counts().reset_index()
age.columns = ["Year", "Age", "Gender", "Count"]
age = age[age["Gender"].isin(["Man", "Woman"])]

age = pd.pivot(data=age, index=["Year", "Age"], 
               columns="Gender", values="Count").reset_index()

age.columns = ["year", "age", "male", "female"]
age["total"] = age["male"] + age["female"]

age.to_csv("age.csv", index=False)

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man2.png')
Image.fromarray(im).save("man2.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady2.png')
Image.fromarray(im).save("lady2.png")



htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all2 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all2">
	<center><h1 style="font-family: parklane">Age is just a formality</h1></center>
	<label for="filter2" style="font-size: 25px">Select Year Here:</label>
	<select id="filter2" style="font-size: 20px">
		<option value="2021">2021</option>
		<option value="2020">2020</option>
		<option value="2019">2019</option>
		<option value="2018">2018</option>
		<option value="2017">2017</option>
	  </select>
	<div class="row">
		<div id="map2" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all2 = document.getElementsByClassName('all2')[0];
$all2.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const centreSpacing = 45

const svg = d3.select("#map2").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)


// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man2.png")
    .attr("x", "-17%")
    .attr("y", "-5%")
    .attr("width", 600)
    .attr("height", 600)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady2.png")
    .attr("x", "55%")
    .attr("y", "0%")
    .attr("width", 550)
    .attr("height", 550)
    .style("opacity", 1)

// Gradient
const defs = svg.append('defs');

const bgGradient1 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient1')

bgGradient1
  .append('stop')
  .attr('stop-color', '#79A988')
  .attr('offset', '0%');
bgGradient1
  .append('stop')
  .attr('stop-color', '#487054')
  .attr('offset', '100%');

const bgGradient2 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient2')

bgGradient2
  .append('stop')
  .attr('stop-color', '#D05257')
  .attr('offset', '0%');
bgGradient2
  .append('stop')
  .attr('stop-color', '#D67A83')
  .attr('offset', '100%');

// ------ DEFAULTS ------
// Initialize scales
const y = d3.scaleBand()
.range([height/1.03, 0])
.padding(0.1)

const x = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2])
const xAxis = svg.append("g")
  .attr('transform', 'translate(20,' + (height-15) + ')')

const xReverse = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2])
const xAxisReverse = svg.append("g")
  .attr('transform', 'translate(490,' + (height-15) + ')')


// Male + Female + Labels objects
var gM = svg.append("g")
  .attr("transform", 
  "translate(" + margin.LEFT + "," + margin.TOP + ")");

var gF = svg.append("g")
  .attr('transform',
    'translate(' +
      (margin.LEFT + (width - centreSpacing) / 2 + centreSpacing) +
      "," +
      margin.TOP +
      ")");

var gLabels = svg
.append('g')
.attr(
  'transform',
  'translate(' +
    (margin.LEFT + (width - centreSpacing) / 2 + 
    ',' + margin.TOP + ')'));


// === UPDATE ===
function update(data) {

  data.forEach(d => {
    d.total = Number(d.total)
    d.female = Number(d.female)
    d.male = Number(d.male)
  })

  // Scales
  y.domain(data.map(d => d.age))

  const maxVal = d3.max(data, d => d3.max([d.male, d.female]))

  x.domain([0, maxVal])
  xReverse.domain([0, maxVal])

  

  // Male
  gM.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', d => (width - centreSpacing) / 2 - x(d.male))
    .attr('y', d => y(d.age))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.male))
    .style('fill', 'url(#bg-gradient1)');


  // Female
  gF.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', 0)
    .attr('y', d => y(d.age))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.female))
    .style('fill', 'url(#bg-gradient2)');
  

  //Labels
  gLabels.selectAll('text')
    .data(data)
    .join('text')
    .attr('x', centreSpacing / 2-18)
    .attr('y', d => y(d.age) + y.bandwidth() / 2+5)
    .style('font-weight', 600)
    .text((d, i) => d.age);

  gLabels
    .append('text')
    .text('Age')
    .attr('x', centreSpacing / 2-18)
    .attr('y', -0)
    .style('font-size', '20px')
    .style('font-weight', 600);


  // Axis Update
  xAxis
    .transition()
    .duration(1000)
    .call(d3.axisBottom(x).ticks(5))
    .selectAll("text")
      .attr("font-size", "15px");

  xAxisReverse
    .transition()
    .duration(1000)
    .call(d3.axisBottom(xReverse).ticks(5))
    .selectAll("text")
      .attr("font-size", "15px");
}


// === Age ===
d3.csv(baseAssetsUrl + "age.csv").then(function(dataload){

  // default view
  var data = dataload.filter(function(d) { 
    var sq = d3.select("#filter2").property("value");
    return d.year === sq;
  });

  update(data)


  // on change
  d3.select("#filter2").on("change", function() {

    var data = dataload.filter(function(d) { 
      var sq = d3.select("#filter2").property("value");
      return d.year === sq;
    });

    update(data);
  });


// Annot
const annot1 = [
  {
  note: { 
    label: "Notice that for both genders the percentage of young responders (aged 18 to 24) increase steadily between 2017 and 2021.",
    title: "Youth Increase",
    wrap: 200, 
    padding: 0, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 650,
  y: 750,
  dy: -30,
  dx: 30
}]

svg.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [7]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='age_eda', 
#                  config=CONFIG, anonymous="allow")
# create_wandb_plot(x_data=age[age.year==2021]["age"], 
#                   y_data=age[age.year==2021]["male"],
#                   x_name="Age", y_name="Cont", 
#                   title="Male Age Frequencies", log="age1", plot="bar")
# create_wandb_plot(x_data=age[age.year==2021]["age"], 
#                   y_data=age[age.year==2021]["female"],
#                   x_name="Age", y_name="Cont", 
#                   title="Female Age Frequencies", log="age2", plot="bar")
# wandb.finish()

<h2 style="font-family: parklane">1.3 Do demographics party differently?</h2>

<p style="font-family: times-new-roman">Yes and No.</p>

<p style="font-family: times-new-roman">The main takeaway is that women have almost the same Country Distribution as men, with the majority coming from <b>India and USA</b>. To be noted however that the ranks 4, 5, and 6 for women are countries like Egypt, UK and Nigeria, compared to men where Japan, China and Brazil hold these places.</p>

In [8]:
cols = ["Year", "What is your gender? - Selected Choice",
        "In which country do you currently reside?"]

country = df[cols]
country = country[cols].value_counts().reset_index()
country.columns = ["Year", "Gender", "Country", "Count"]
country = country[country["Gender"].isin(["Man", "Woman"])].reset_index(drop=True)

country["Country"] = country["Country"].replace(["United States of America",
                            "United Kingdom of Great Britain and Northern Ireland",
                            "I do not wish to disclose my location",
                            "Iran, Islamic Republic of...", "Hong Kong (S.A.R.)"],
                           ["USA", "United Kingdom", "Undisclosed", "Iran",
                            "Hong Kong"])

country = country[country["Year"] == 2021]
country_man = country[country["Gender"] == "Man"]
country_woman = country[country["Gender"] == "Woman"]

country_man.to_csv("country_man.csv", index=False)
country_woman.to_csv("country_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/men.png')
Image.fromarray(im).save("men.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/ladies.png')
Image.fromarray(im).save("ladies.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all3 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all3">
	<center><h1 style="font-family: parklane">Demographics</h1></center>
	<center><h2>2021: Frequency of Respondents on Countries</h2></center>
	<div class="row">
		<div id="map3" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all3 = document.getElementsByClassName('all3')[0];
$all3.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 10, RIGHT: 10, TOP: 10, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const innerRadius = 120
const outerRadius = Math.min(width, height) / 2

const svg = d3.select("#map3").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)


// Men
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"men.png")
    .attr("x", "4.5%")
    .attr("y", "56.5%")
    .attr("width", 370)
    .attr("height", 330)
    .style("opacity", 1)


// Women
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"ladies.png")
    .attr("x", "54%")
    .attr("y", "57%")
    .attr("width", 370)
    .attr("height", 300)
    .style("opacity", 1)

svg.append('line')
.style("stroke", "black")
.style("stroke-width", 1)
    .attr("x1", 470)
    .attr("y1", 60)
    .attr("x2", 470)
    .attr("y2", 900)


// === Men ===
d3.csv(baseAssetsUrl + "country_man.csv").then(function(data){

  const x_global = 230

  svg.append("text")
  .attr("transform", `translate(${x_global}, ${width/3-200})`)
  .attr("font-size", "28px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  data.forEach(d => {
    d.Count = Number(d.Count)
  })

  // Color
  var myColor = d3.scaleLinear().domain([1,66])
                  .range(["#507C5E", "#C1D7C8"])

  // Scales
  const x = d3.scaleBand()
      .range([0, 2 * Math.PI]) 
      .align(0)        
      .domain(data.map(d => d.Country));
  const y = d3.scaleRadial()
      .range([innerRadius, outerRadius])  
      .domain([0, d3.max(data, d => d.Count)]); 

  // Add the bars
  svg.append("g")
    .selectAll("path")
    .data(data)
    .join("path")
    .attr("transform", `translate(${x_global}, ${width/1.5})`)
    .attr("fill", (d, i) => myColor(i))
      .attr("d", d3.arc()   
          .innerRadius(innerRadius)
          .outerRadius(d => y(d['Count']))
          .startAngle(d => x(d.Country))
          .endAngle(d => x(d.Country) + x.bandwidth())
          .padAngle(0.01)
          .padRadius(innerRadius))

  // Add the labels
  svg.append("g")
  .attr("transform", `translate(${x_global}, ${width/1.5})`)
      .selectAll("g")
      .data(data)
      .join("g")
        .attr("text-anchor", function(d) { return (x(d.Country) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "end" : "start"; })
        .attr("transform", function(d) { return "rotate(" + ((x(d.Country) + x.bandwidth() / 2) * 180 / Math.PI - 90) + ")"+"translate(" + (y(d['Count'])+10) + ",0)"; })
      .append("text")
        .text(function(d){return(d.Country)})
        .attr("transform", function(d) { return (x(d.Country) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "rotate(180)" : "rotate(0)"; })
        .style("font-size", "13px")
        .attr("font-weight", 600)
        .attr("alignment-baseline", "middle")

  });

// === Women ===
d3.csv(baseAssetsUrl + "country_woman.csv").then(function(data){

  const x_global = 695

  svg.append("text")
  .attr("transform", `translate(${x_global}, ${width/3-200})`)
  .attr("font-size", "28px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  data.forEach(d => {
    d.Count = Number(d.Count)
  })

  // Color
  var myColor = d3.scaleLinear().domain([1,66])
                  .range(["#AC3541", "#DD929A"])

  // Scales
  const x = d3.scaleBand()
      .range([0, 2 * Math.PI]) 
      .align(0)        
      .domain(data.map(d => d.Country));
  const y = d3.scaleRadial()
      .range([innerRadius, outerRadius])  
      .domain([0, d3.max(data, d => d.Count)]); 

  // Add the bars
  svg.append("g")
    .selectAll("path")
    .data(data)
    .join("path")
    .attr("transform", `translate(${x_global}, ${width/1.5})`)
    .attr("fill", (d, i) => myColor(i))
      .attr("d", d3.arc()   
          .innerRadius(innerRadius)
          .outerRadius(d => y(d['Count']))
          .startAngle(d => x(d.Country))
          .endAngle(d => x(d.Country) + x.bandwidth())
          .padAngle(0.01)
          .padRadius(innerRadius))

  // Add the labels
  svg.append("g")
  .attr("transform", `translate(${x_global}, ${width/1.5})`)
      .selectAll("g")
      .data(data)
      .join("g")
        .attr("text-anchor", function(d) { return (x(d.Country) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "end" : "start"; })
        .attr("transform", function(d) { return "rotate(" + ((x(d.Country) + x.bandwidth() / 2) * 180 / Math.PI - 90) + ")"+"translate(" + (y(d['Count'])+10) + ",0)"; })
      .append("text")
        .text(function(d){return(d.Country)})
        .attr("transform", function(d) { return (x(d.Country) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "rotate(180)" : "rotate(0)"; })
        .style("font-size", "13px")
        .attr("font-weight", 600)
        .attr("alignment-baseline", "middle")

  });

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [9]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='countries_eda', 
#                  config=CONFIG, anonymous="allow")
# create_wandb_plot(x_data=country_man["Country"].head(15), 
#                   y_data=country_man["Count"].head(15),
#                   x_name="Country", y_name="Count", 
#                   title="Male Country Top 15", log="country1", plot="bar")
# create_wandb_plot(x_data=country_woman["Country"].head(15), 
#                   y_data=country_woman["Count"].head(15),
#                   x_name="Country", y_name="Count", 
#                   title="Female Country Top 15", log="country2", plot="bar")
# wandb.finish()

<p style="font-family: times-new-roman">Let's look at the top 10 countries in 2021 and their evolution:</p>

* <p style="font-family: times-new-roman">🎩 For the gentlemen, there have been many more responders from <b>Nigeria and Pakistan</b>, ending up in top 10 in 2021 from rankings below 20 in 2017.</p>
* <p style="font-family: times-new-roman">💃 For the ladies, there has been a huge surge in respondents from <b>Egipt, Indonesia and Nigeria</b>, these countries ending up in top 10 most responders in 2021.</p>

<p style="font-family: times-new-roman">It's beautiful to see that more and more people joing our community from more diverse backgrounds and countries.</p>

In [10]:
cols = ["Year", "What is your gender? - Selected Choice",
        "In which country do you currently reside?"]

country = df[cols]
country = country[cols].value_counts().reset_index()
country.columns = ["Year", "Gender", "Country", "Count"]
country = country[country["Gender"].isin(["Man", "Woman"])].reset_index(drop=True)

country["Country"] = country["Country"].replace(["United States of America",
                            "United Kingdom of Great Britain and Northern Ireland",
                            "I do not wish to disclose my location",
                            "Iran, Islamic Republic of...", "Hong Kong (S.A.R.)",
                            "United States", "People 's Republic of China"],
                           ["USA", "United Kingdom", "Undisclosed", "Iran",
                            "Hong Kong", "USA", "China"])

man_c = ['India', 'USA', 'Other', 'Japan', 'China', 'Brazil', 'Russia',
         'Nigeria', 'Pakistan', 'United Kingdom']
country2_man = country[country["Gender"]=="Man"]
country2_man = country2_man.sort_values(["Year", "Count"], ascending=False).reset_index(drop=True)
country2_man = country2_man.groupby('Year').head(40).reset_index()
country2_man["Rank"] = country2_man.groupby('Year')['Count'].rank(ascending=False, method="first")
country2_man = country2_man[country2_man["Country"].isin(man_c)]

woman_c = ['India', 'USA', 'Other', 'Egypt', 'United Kingdom', 'Nigeria',
           'China', 'Russia', 'Indonesia', 'Taiwan']
country2_woman = country[country["Gender"]=="Woman"]
country2_woman = country2_woman.sort_values(["Year", "Count"], ascending=False).reset_index(drop=True)
country2_woman = country2_woman.groupby('Year').head(40).reset_index()
country2_woman["Rank"] = country2_woman.groupby('Year')['Count'].rank(ascending=False, method="first")
country2_woman = country2_woman[country2_woman["Country"].isin(woman_c)]

country2_man.to_csv("country2_man.csv", index=False)
country2_woman.to_csv("country2_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man4.png')
Image.fromarray(im).save("man4.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady4.png')
Image.fromarray(im).save("lady4.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all4 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all4">
	<center><h1 style="font-family: parklane">Evolution of the top 10 countries in 2021</h1></center>
	<div class="row">
		<div id="map4" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all4 = document.getElementsByClassName('all4')[0];
$all4.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 10, RIGHT: 10, TOP: 10, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const x_global = 150

const svg = d3.select("#map4").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .append("g")
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man4.png")
    .attr("x", "-12%")
    .attr("y", "5%")
    .attr("width", 370)
    .attr("height", 370)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady4.png")
  .attr("x", "-17%")
  .attr("y", "45%")
  .attr("width", 540)
  .attr("height", 540)
  .style("opacity", 1)


// === MEN ===
d3.csv(baseAssetsUrl + "country2_man.csv").then( function(data) {

  data.forEach(d => {
    d.Year = Number(d.Year)
    d.Count = Number(d.Count)
  })

  // Title
  svg.append("text")
  .attr("transform", `translate(${x_global+300}, ${height/2-410})`)
  .attr("font-size", "25px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen Ranking")


  // Color
  const color = d3.scaleOrdinal()
  .range(['#D67A83','#D05257','#D87F58','#CE8E27','#79A988',
  '#12A6B6','#74A5D2','#226BBF','#9C3175', '#6D2251'])

  // Group Lines by Country
  const sumstat = d3.group(data, d => d.Country);

  // Add X axis
  const x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([ 0, width/1.5]);
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/2-30})`)
    .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format("d")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([37, 1])
    .range([ height/2-90, 0 ]);
  svg.append("g")
  // .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/2-390})`)
    .call(d3.axisLeft(y).ticks(20))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Draw the lines
  const graph = svg.selectAll(".line")
      .data(sumstat)
      .join("path")
      .attr("transform", `translate(${x_global}, ${height/2-390})`)
        .attr("fill", "none")
        .attr("stroke", d => color(d[0]) )
        .attr("stroke-width", 5)
        .attr('opacity', 0.8)
        .attr("d", function(d){
          return d3.line()
            .x(d => x(d.Year))
            .y(d => y(d.Rank))
            (d[1])
        })

  // Interactive
  graph
  .on('mouseenter', function (event, dt) {
  // MOUSE ON
  d3.selectAll('.value')
        .attr('opacity', 0)

  d3.select(this)
    .transition()
    .duration(100)
    .attr('opacity', 1)
    .attr("stroke-width", 10)

})
  // MOUSE LEAVE
  .on('mouseleave', function () {
    d3.selectAll('.value')
        .attr('opacity', 1)

    d3.select(this)
      .transition()
      .duration(100)
      .attr('opacity', 0.8)
      .attr("stroke-width", 5)
  })

   // Legend
  const keys = ['India', 'USA', 'Other', 'Japan', 'China', 'Brazil', 
  'Russia', 'Nigeria', 'Pakistan', 'United Kingdom']

  var legend = svg.selectAll("graph")
  .data(keys)
  .join("g")
  .attr("transform", `translate(${x_global+540}, ${height/2-490})`)

  legend
  .append("circle")
    .attr("cx", 100)
    .attr("cy", function(d,i){ return 100 + i*38}) 
    .attr("r", 7)
    .style("fill", function(d){ return color(d)})

  legend
    .append("text")
      .attr("x", 120)
      .attr("y", function(d,i){ return 100 + i*38}) 
      .style("fill", function(d){ return color(d)})
      .text(function(d){ return d})
      .attr("text-anchor", "left")
      .attr("font-weight", 600)
      .style("alignment-baseline", "middle")
})


// === LADIES ===
d3.csv(baseAssetsUrl + "country2_woman.csv").then( function(data) {

  data.forEach(d => {
    d.Year = Number(d.Year)
    d.Count = Number(d.Count)
  })

  // Title
  svg.append("text")
  .attr("transform", `translate(${x_global+300}, ${height/2+40})`)
  .attr("font-size", "25px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies Ranking")

  // Color
  const color = d3.scaleOrdinal()
  .range(['#D67A83','#D05257','#D87F58','#CE8E27','#79A988',
  '#12A6B6','#74A5D2','#226BBF','#9C3175', '#6D2251'])

  const sumstat = d3.group(data, d => d.Country);

  // Add X axis
  const x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([ 0, width/1.5]);
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/2+420})`)
    .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format("d")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([33, 1])
    .range([ height/2-90, 0 ]);
  svg.append("g")
  // .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/1.78})`)
    .call(d3.axisLeft(y).ticks(20))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Draw the lines
  const graph = svg.selectAll(".line")
      .data(sumstat)
      .join("path")
      .attr("transform", `translate(${x_global}, ${height/1.78})`)
        .attr("fill", "none")
        .attr("stroke", d => color(d[0]) )
        .attr("stroke-width", 5)
        .attr('opacity', 0.8)
        .attr("d", function(d){
          return d3.line()
            .x(d => x(d.Year))
            .y(d => y(d.Rank))
            (d[1])
        })

  // Interactive
  graph
  .on('mouseenter', function (event, dt) {
  // MOUSE ON
  d3.selectAll('.value')
        .attr('opacity', 0)

  d3.select(this)
    .transition()
    .duration(100)
    .attr('opacity', 1)
    .attr("stroke-width", 10)

})
  // MOUSE LEAVE
  .on('mouseleave', function () {
    d3.selectAll('.value')
        .attr('opacity', 1)

    d3.select(this)
      .transition()
      .duration(100)
      .attr('opacity', 0.8)
      .attr("stroke-width", 5)
  })


  // Legend
  const keys = ['India', 'USA', 'Other', 'Egypt', 'United Kingdom', 'Nigeria',
  'China', 'Russia', 'Indonesia', 'Taiwan']

  var legend = svg.selectAll("graph")
  .data(keys)
  .join("g")
  .attr("transform", `translate(${x_global+540}, ${height/2-40})`)

  legend
  .append("circle")
    .attr("cx", 100)
    .attr("cy", function(d,i){ return 100 + i*38}) 
    .attr("r", 7)
    .style("fill", function(d){ return color(d)})

  legend
    .append("text")
      .attr("x", 120)
      .attr("y", function(d,i){ return 100 + i*38}) 
      .style("fill", function(d){ return color(d)})
      .text(function(d){ return d})
      .attr("text-anchor", "left")
      .attr("font-weight", 600)
      .style("alignment-baseline", "middle")
})

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: parklane">1.4 Education</h2>

<p style="font-family: times-new-roman">The most interesting trend we see is a decrease in the percentage of respondents with Masters in the detriment of people that have a Bachelor as the highest degree so far.</p>

<p style="font-family: times-new-roman">This is because, as the pool of young people increased in 2020 and 2021, the <b>amount of students that are still in their studies (and not yet finished) increased</b> as well.</p>

In [11]:
# Education
cols = ["What is the highest level of formal education that you have attained or plan to attain within the next 2 years?",
        "Year",
        "What is your gender? - Selected Choice"]

education = df[cols].value_counts().reset_index()
education.columns = ["Degree", "Year", "Gender", "Count"]
education = education[education["Gender"].isin(["Man", "Woman"])]\
                    .reset_index(drop=True)

education["Degree"] = education["Degree"].replace(['Masterâ€™s degree', 'Bachelorâ€™s degree',
                             'Some college/university study without earning a bachelorâ€™s degree',
                             "Some college/university study without earning a bachelor's degree",
                             'No formal education past high school',
                             'I did not complete any formal education past high school'],
                            
                            ["Master's degree", "Bachelor's degree",
                             'Professional degree', 'Professional degree',
                             "High School degree", "High School degree"])
education = education.groupby(["Degree", "Year", "Gender"]).sum().reset_index()
education = education[education["Degree"]!="Professional doctorate"]

degree = ["Bachelor's degree", 'Doctoral degree', 'High School degree',
       'I prefer not to answer', "Master's degree", 'Professional degree']
new_degree = ["3. Bachelor's degree", '1. Doctoral degree', '5. High School degree',
       '6. I prefer not to answer', "2. Master's degree", '4. Professional degree']
education["Degree"] = education["Degree"].replace(degree, new_degree)

education = pd.merge(education, total, on=["Year", "Gender"])
education["Perc"] = education["Count_x"]/education["Count_y"]
education = education.drop(columns=["Count_x", "Count_y"])
# education = pd.pivot(data=education, index=["Degree", "Gender"], 
#                          columns="Year", values="Perc").reset_index()

education_man = education[education["Gender"]=="Man"].sort_values(["Year", "Degree"])
education_woman = education[education["Gender"]=="Woman"].sort_values(["Year", "Degree"])

education_man.to_csv("education_man.csv", index=False)
education_woman.to_csv("education_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man5.png')
Image.fromarray(im).save("man5.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady5.png')
Image.fromarray(im).save("lady5.png")


htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all5 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all5">
	<center><h1 style="font-family: parklane">Education of the respondents</h1></center>
	<div class="row">
		<div id="map5" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all5 = document.getElementsByClassName('all5')[0];
$all5.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 10, RIGHT: 10, TOP: 10, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const x_global = 150

const svg = d3.select("#map5").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .append("g")
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)
   

// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man5.png")
    .attr("x", "-10%")
    .attr("y", "6%")
    .attr("width", 330)
    .attr("height", 330)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady5.png")
  .attr("x", "-28%")
  .attr("y", "60%")
  .attr("width", 500)
  .attr("height", 500)
  .style("opacity", 1)


// Color
const color = d3.scaleOrdinal()
.range(['#D67A83','#D05257','#D87F58','#CE8E27','#79A988',
'#12A6B6','#74A5D2','#226BBF','#9C3175', '#6D2251'])


// Legend
const keys = ['1. Doctoral degree', "2. Master's degree", "3. Bachelor's degree",
'4. Professional degree', '5. High School degree',
'6. I prefer not to answer']

var legend = svg.selectAll("graph")
.data(keys)
.join("g")
.attr("transform", `translate(${x_global-170}, ${height/2-110})`)

legend
.append("circle")
  .attr("cx", (d, i) => (i < 2 ? 100 : (i < 4 ? 400 : 700)))
  .attr("cy", (d, i) => (i%2 === 0 ? 100 : 140)) 
  .attr("r", 9)
  .style("fill", function(d){ return color(d)})

legend
  .append("text")
    .attr("x", (d, i) => (i < 2 ? 115 : (i < 4 ? 415 : 715)))
    .attr("y", (d, i) => (i%2 === 0 ? 100 : 140)) 
    .attr("font-size", "19px")
    .style("fill", function(d){ return color(d)})
    .text(function(d){ return d})
    .attr("text-anchor", "left")
    .attr("font-weight", 700)
    .style("alignment-baseline", "middle")


// === MEN ===
d3.csv(baseAssetsUrl + "education_man.csv").then( function(data) {

  data.forEach(d => {
    d.Year = Number(d.Year)
    d.Perc = Number(d.Perc)
  })

  // Title
  svg.append("text")
  .attr("transform", `translate(${x_global+300}, ${height/2-410})`)
  .attr("font-size", "25px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  // Group Lines by Country
  const sumstat = d3.group(data, d => d.Degree);

  // Add X axis
  const x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([ 0, width/1.5]);
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/2-70})`)
    .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format("d")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, d3.max(data, d => d.Perc)])
    .range([ height/2-130, 0 ]);
  svg.append("g")
  // .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/2-390})`)
    .call(d3.axisLeft(y).ticks(13).tickFormat(d3.format(".0%")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Draw the lines
  const graph = svg.selectAll(".line")
      .data(sumstat)
      .join("path")
      .attr("transform", `translate(${x_global}, ${height/2-390})`)
        .attr("fill", "none")
        .attr("stroke", d => color(d[0]) )
        .attr("stroke-width", 5)
        .attr('opacity', 0.8)
        .attr("d", function(d){
          return d3.line()
            .x(d => x(d.Year))
            .y(d => y(d.Perc))
            (d[1])
        })

  // Interactive
  graph
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    d3.selectAll('.value')
          .attr('opacity', 0)

    d3.select(this)
      .transition()
      .duration(100)
      .attr('opacity', 1)
      .attr("stroke-width", 10)

  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll('.value')
          .attr('opacity', 1)

      d3.select(this)
        .transition()
        .duration(100)
        .attr('opacity', 0.8)
        .attr("stroke-width", 5)
   })  
  
})

// === WOMEN ===
d3.csv(baseAssetsUrl + "education_woman.csv").then( function(data) {

  data.forEach(d => {
    d.Year = Number(d.Year)
    d.Perc = Number(d.Perc)
  })

  // Title
  svg.append("text")
  .attr("transform", `translate(${x_global+300}, ${height/2+90})`)
  .attr("font-size", "25px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  // Group Lines by Country
  const sumstat = d3.group(data, d => d.Degree);

  // Add X axis
  const x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([ 0, width/1.5]);
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/1.03})`)
    .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format("d")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, d3.max(data, d => d.Perc)])
    .range([ height/2-130, 0 ]);
  svg.append("g")
  // .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/2+100})`)
    .call(d3.axisLeft(y).ticks(15).tickFormat(d3.format(".0%")))
    .selectAll("text")
      .attr("font-size", "15px")
      .attr("font-weight", 600);

  // Draw the lines
  const graph = svg.selectAll(".line")
      .data(sumstat)
      .join("path")
      .attr("transform", `translate(${x_global}, ${height/2+100})`)
        .attr("fill", "none")
        .attr("stroke", d => color(d[0]) )
        .attr("stroke-width", 5)
        .attr('opacity', 0.8)
        .attr("d", function(d){
          return d3.line()
            .x(d => x(d.Year))
            .y(d => y(d.Perc))
            (d[1])
        })

    // Interactive
    graph
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    d3.selectAll('.value')
          .attr('opacity', 0)

    d3.select(this)
      .transition()
      .duration(100)
      .attr('opacity', 1)
      .attr("stroke-width", 10)

  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll('.value')
          .attr('opacity', 1)

      d3.select(this)
        .transition()
        .duration(100)
        .attr('opacity', 0.8)
        .attr("stroke-width", 5)
   })   
  
})

// Annot
const annot1 = [
  {
  note: { 
    label: "Increase in Bsc & decrease in Msc due to a bigger pool of respondents between 18 and 24 yo.",
    title: "Bsc vs Msc",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 780,
  y: 95,
  dy: +10,
  dx: 30
},
{
  note: { 
    label: "We see the same trend for women as well.",
    title: "Bsc vs Msc",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 780,
  y: 620,
  dy: +10,
  dx: 30
}]

svg.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<div class="alert simple-alert" style="font-family: times-new-roman">
  <p style="font-family: times-new-roman"><center>👀 So far, we know that we're looking at a 20:80 ratio between 💃ladies and 🎩gents. We know that Data Science is getting much more traction between the youth in 2021 and more and more people are drawn towards the Kaggle platform, especially students. We know that, although the vast majority of respondents are from India and the US, people from Nigeria, Pakistan, Egipt and Indonesia are increasing in numbers fast, so it's possible that we will see more diversity in the years to come.</center></p>
</div>

<h1 style="font-family: parklane">2. Getting up close and personal</h1>

<h2 style="font-family: parklane">2.1 Occupation</h2>

<p style="font-family: times-new-roman">For both genders, the majority of respondents are either <b>Students</b> or have a profession within <b>Data Science</b>. However, some interesting differences happen within 3rd most common position:</p>

* <p style="font-family: times-new-roman">🎩 3rd most common position for gents is <b>Software Engineer</b>.</p>
* <p style="font-family: times-new-roman">💃 3rd most common position for ladies is <b>Data Analyst</b>.</p>

<p style="font-family: times-new-roman">Throughout years, there are no significant changes between these rankings.</p>

In [12]:
cols = ["Year", "What is your gender? - Selected Choice",
        "Select the title most similar to your current role (or most recent title if retired): - Selected Choice"]

role = df[cols]
role.columns = ["Year", "Gender", "Role"]
role = role[role["Gender"].isin(["Man", "Woman"])]

old_role = ['Currently not employed', 'Product Manager',
            'DBA/Database Engineer', 'Product/Project Manager',
            'Not employed', 'Manager', 
            'Software Developer/Software Engineer', 'Scientist/Researcher',
            'Research Assistant', 'Predictive Modeler', 'Data Miner',
            'Programmer', 'Operations Research Practitioner', 'Developer Advocate',
            'Machine Learning Engineer', 'Researcher', 'Program/Project Manager']
new_role = ['Unemployed', 'Project Manager',
            'DBA/Database Engineer', 'Project Manager',
            'Unemployed', 'Project Manager',
            'Software Engineer', 'Research Scientist',
            'Research Scientist', 'Data Scientist', 'Data Engineer',
            'Computer Scientist', 'Research Scientist', 'Software Engineer',
            'Data Scientist', 'Research Scientist', 'Project Manager']
role["Role"] = role["Role"].replace(old_role, new_role)
role = role.value_counts().reset_index()

to_erase = list(role["Role"].value_counts().index[12:])
role = role[~role["Role"].isin(to_erase)]
role.columns = ["Year", "Gender", "Role", "Count"]

role = role[role["Year"]==2021]
role_man = role[role["Gender"]=="Man"]
role_woman = role[role["Gender"]=="Woman"]

role_man.to_csv("role_man.csv", index=False)
role_woman.to_csv("role_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/gents6.png')
Image.fromarray(im).save("gents6.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/ladies6.png')
Image.fromarray(im).save("ladies6.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all6 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.gents text {
		fill: #A54D27;
	}

	.annotation.ladies text {
		fill: #6D2251;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all6">
	<center><h1 style="font-family: parklane">Occupation</h1></center>
	<center><h2>2021: Frequency of Respondents on Roles</h2></center>
	<div class="row">
		<div id="map6" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all6 = document.getElementsByClassName('all6')[0];
$all6.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 10, RIGHT: 10, TOP: 10, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const innerRadius = 100
const outerRadius = Math.min(width, height) / 5

const svg = d3.select("#map6").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)


// Men
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"gents6.png")
    .attr("x", "6.3%")
    .attr("y", "47%")
    .attr("width", 340)
    .attr("height", 340)
    .style("opacity", 1)

// Women
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"ladies6.png")
    .attr("x", "59.5%")
    .attr("y", "47%")
    .attr("width", 260)
    .attr("height", 260)
    .style("opacity", 1)

svg.append('line')
.style("stroke", "black")
.style("stroke-width", 1)
    .attr("x1", 480)
    .attr("y1", 60)
    .attr("x2", 480)
    .attr("y2", 900)


// === Men ===
d3.csv(baseAssetsUrl + "role_man.csv").then(function(data){

  const x_global = 230

  svg.append("text")
  .attr("transform", `translate(${x_global}, ${width/3-200})`)
  .attr("font-size", "28px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  data.forEach(d => {
    d.Count = Number(d.Count)
  })

  // Color
  var myColor = d3.scaleLinear().domain([1,12])
                  .range(["#A54D27", "#E4A68B"])

  // Scales
  const x = d3.scaleBand()
      .range([0, 2 * Math.PI]) 
      .align(0)        
      .domain(data.map(d => d.Role));
  const y = d3.scaleRadial()
      .range([innerRadius, outerRadius])  
      .domain([0, d3.max(data, d => d.Count)]); 

  // Add the bars
  svg.append("g")
    .selectAll("path")
    .data(data)
    .join("path")
    .attr("transform", `translate(${x_global}, ${width/1.8})`)
      .attr("fill", (d, i) => myColor(i))
      .attr("d", d3.arc()   
          .innerRadius(innerRadius)
          .outerRadius(d => y(d['Count']))
          .startAngle(d => x(d.Role))
          .endAngle(d => x(d.Role) + x.bandwidth())
          .padAngle(0.01)
          .padRadius(innerRadius))

  // Add the labels
  svg.append("g")
  .attr("transform", `translate(${x_global}, ${width/1.8})`)
      .selectAll("g")
      .data(data)
      .join("g")
        .attr("text-anchor", function(d) { return (x(d.Role) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "end" : "start"; })
        .attr("transform", function(d) { return "rotate(" + ((x(d.Role) + x.bandwidth() / 2) * 180 / Math.PI - 90) + ")"+"translate(" + (y(d['Count'])+10) + ",0)"; })
      .append("text")
        .text(function(d){return(d.Role)})
        .attr("transform", function(d) { return (x(d.Role) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "rotate(180)" : "rotate(0)"; })
        .style("font-size", "14px")
        .attr("font-weight", 600)
        .attr("alignment-baseline", "middle")

  });

// === Women ===
d3.csv(baseAssetsUrl + "role_woman.csv").then(function(data){

  const x_global = 695

  svg.append("text")
  .attr("transform", `translate(${x_global}, ${width/3-200})`)
  .attr("font-size", "28px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  data.forEach(d => {
    d.Count = Number(d.Count)
  })

  // Color
  var myColor = d3.scaleLinear().domain([1,12])
                  .range(["#6D2251", "#E2A2CA"])

  // Scales
  const x = d3.scaleBand()
      .range([0, 2 * Math.PI]) 
      .align(0)        
      .domain(data.map(d => d.Role));
  const y = d3.scaleRadial()
      .range([innerRadius, outerRadius])  
      .domain([0, d3.max(data, d => d.Count)]); 

  // Add the bars
  svg.append("g")
    .selectAll("path")
    .data(data)
    .join("path")
    .attr("transform", `translate(${x_global}, ${width/1.8})`)
    .attr("fill", (d, i) => myColor(i))
      .attr("d", d3.arc()   
          .innerRadius(innerRadius)
          .outerRadius(d => y(d['Count']))
          .startAngle(d => x(d.Role))
          .endAngle(d => x(d.Role) + x.bandwidth())
          .padAngle(0.01)
          .padRadius(innerRadius))

  // Add the labels
  svg.append("g")
  .attr("transform", `translate(${x_global}, ${width/1.8})`)
      .selectAll("g")
      .data(data)
      .join("g")
        .attr("text-anchor", function(d) { return (x(d.Role) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "end" : "start"; })
        .attr("transform", function(d) { return "rotate(" + ((x(d.Role) + x.bandwidth() / 2) * 180 / Math.PI - 90) + ")"+"translate(" + (y(d['Count'])+10) + ",0)"; })
      .append("text")
        .text(function(d){return(d.Role)})
        .attr("transform", function(d) { return (x(d.Role) + x.bandwidth() / 2 + Math.PI) % (2 * Math.PI) < Math.PI ? "rotate(180)" : "rotate(0)"; })
        .style("font-size", "14px")
        .attr("font-weight", 600)
        .attr("alignment-baseline", "middle")

  });


// Annot
const annot1 = [
  {
  note: { 
    label: "The majority of respondents are either in univ or DS-ists (for both men and women).",
    title: "Students & Data Scientists",
    wrap: 300, 
    padding: 5, 
  },
  className: "gents",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 330,
  y: 360,
  dy: -100,
  dx: -1
},
{
  note: { 
    label: "3rd most preferred role for women is Data Analyst, compared to SE for men.",
    title: "Data Analyst vs Engineer",
    wrap: 300, 
    padding: 5, 
  },
  className: "ladies",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 870,
  y: 500,
  dy: +290,
  dx: -1
}]

svg.append("g")
  .style('font-size', 19)
  .call(d3.annotation()
          .annotations(annot1))

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [13]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='role_eda', 
#                  config=CONFIG, anonymous="allow")
# create_wandb_plot(x_data=role_man["Role"], 
#                   y_data=role_man["Count"],
#                   x_name="Role", y_name="Count", 
#                   title="Male Role Count", log="role1", plot="bar")
# create_wandb_plot(x_data=role_woman["Role"], 
#                   y_data=role_woman["Count"],
#                   x_name="Role", y_name="Count", 
#                   title="Female Role Count", log="role2", plot="bar")
# wandb.finish()

<h2 style="font-family: parklane">2.2 What language are we speaking?</h2>

<p style="font-family: times-new-roman">First thing to notice is that the pool of respondents in <b>2021 is less experienced than in previous years</b>. Between 2017 and 2020, the majority have been programming between 1 and 5 years. However, in 2021 the vast majority of respondents has been programming for only 1-3 years.</p>

<p style="font-family: times-new-roman">In regards to what language the genders would recommend, it seems that <b>Python has the overwhelming vote</b>. However, <b>R is losing popularity</b> in the detriment of SQL.</p>

In [14]:
# ========== Years coding ==========
cols = ["Year", "What is your gender? - Selected Choice",
        "For how many years have you been writing code and/or programming?"]
program = df[cols]
program.columns = ["Year", "Gender", "Years_Programming"]
program = program[program["Gender"].isin(["Man", "Woman"])]

old_code = ['5-10 years', '20+ years', '1-3 years', '< 1 years', '3-5 years',
       '10-20 years', 'I have never written code', '1-2 years',
       'I have never written code but I want to learn', '< 1 year',
       '20-30 years', '30-40 years',
       'I have never written code and I do not want to learn',
       '40+ years', 'Less than a year', '3 to 5 years',
       'More than 10 years', '6 to 10 years', '1 to 2 years',
       "I don't write code to analyze data"]
new_code = ['5. 5-10 years', '7. 20+ years', '3. 1-3 years', '2. <1 years', '4. 3-5 years',
       '6. 10-20 years', '1. Never', '3. 1-3 years',
       '1. Never', '2. <1 years',
       '7. 20+ years', '7. 20+ years',
       '1. Never',
       '7. 20+ years', '2. <1 years', '4. 3-5 years',
       '6. 10-20 years', '5. 5-10 years', '3. 1-3 years',
       "1. Never"]
program["Years_Programming"] = program["Years_Programming"].replace(old_code,
                                                                    new_code)

program = program.value_counts().reset_index()
program = pd.pivot(data=program, index=["Year", "Years_Programming"],
                   columns="Gender", values=0).reset_index()

program.columns = ["year", "program", "male", "female"]
program.to_csv("program.csv", index=False)

# ========== Preferences ==========
cols2 = ["Year", "What is your gender? - Selected Choice",
         "What programming language would you recommend an aspiring data scientist to learn first? - Selected Choice"]
pref = df[cols2]
pref.columns = ["Year", "Gender", "Preference"]
pref = pref[pref["Gender"].isin(["Man", "Woman"])]

to_keep = list(pref["Preference"].value_counts().index[:3])
pref = pref[pref["Preference"].isin(to_keep)]
pref = pref.value_counts().reset_index()

pref = pd.pivot(data=pref, index=["Year", "Gender"],
                columns=["Preference"], values=0).reset_index()
pref = pref[["Year", "Gender", "Python", "R", "SQL"]]
pref_man = pref[pref["Gender"] == "Man"].drop(columns=["Gender"])
pref_woman = pref[pref["Gender"] == "Woman"].drop(columns=["Gender"])

pref_man.to_csv("pref_man.csv", index=False)
pref_woman.to_csv("pref_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man7.png')
Image.fromarray(im).save("man7.png")
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady7.png')
Image.fromarray(im).save("lady7.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all7 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all7">
	<center><h1 style="font-family: parklane">For how many years have we been programming?</h1></center>
	<label for="filter7" style="font-size: 25px">Select Year Here:</label>
	<select id="filter7" style="font-size: 20px">
		<option value="2021">2021</option>
		<option value="2020">2020</option>
		<option value="2019">2019</option>
		<option value="2018">2018</option>
		<option value="2017">2017</option>
	  </select>
	<div class="row">
		<div id="map7" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all7 = document.getElementsByClassName('all7')[0];
$all7.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const centreSpacing = 100

const svg = d3.select("#map7").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man7.png")
    .attr("x", "-13%")
    .attr("y", "0%")
    .attr("width", 420)
    .attr("height", 420)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady7.png")
.attr("x", "73%")
.attr("y", "2%")
.attr("width", 360)
.attr("height", 360)
.style("opacity", 1)

// Gradient
const defs = svg.append('defs');

const bgGradient1 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient1')

bgGradient1
  .append('stop')
  .attr('stop-color', '#79A988')
  .attr('offset', '0%');
bgGradient1
  .append('stop')
  .attr('stop-color', '#487054')
  .attr('offset', '100%');

const bgGradient2 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient3')

bgGradient2
  .append('stop')
  .attr('stop-color', '#CE8E27')
  .attr('offset', '0%');
bgGradient2
  .append('stop')
  .attr('stop-color', '#DEA954')
  .attr('offset', '100%');

// ------ DEFAULTS ------
// Initialize scales
const y = d3.scaleBand()
.range([height/2.7, 0])
.padding(0.1)

const x = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxis = svg.append("g")
  .attr('transform', 'translate(165,' + (height-530) + ')')

const xReverse = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxisReverse = svg.append("g")
  .attr('transform', 'translate(515,' + (height-530) + ')')


// Male + Female + Labels objects
var gM = svg.append("g")
  .attr("transform", 
  "translate(" + 20 + "," + margin.TOP + ")");

var gF = svg.append("g")
  .attr('transform',
    'translate(' +
      (margin.LEFT + (width - centreSpacing) / 2 + centreSpacing) +
      "," +
      margin.TOP +
      ")");

var gLabels = svg
.append('g')
.attr(
  'transform',
  'translate(' +
    (margin.LEFT + (width - centreSpacing) / 2 + 
    ',' + margin.TOP + ')'));


// === UPDATE ===
function update(data) {

  data.forEach(d => {
    d.female = Number(d.female)
    d.male = Number(d.male)
  })

  // Scales
  y.domain(data.map(d => d.program))

  const maxVal = d3.max(data, d => d3.max([d.male, d.female]))

  x.domain([0, maxVal])
  xReverse.domain([0, maxVal])

  

  // Male
  gM.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', d => (width - centreSpacing) / 2 - x(d.male))
    .attr('y', d => y(d.program))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.male))
    .style('fill', 'url(#bg-gradient1)');


  // Female
  gF.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', 0)
    .attr('y', d => y(d.program))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.female))
    .style('fill', 'url(#bg-gradient3)');
  

  //Labels
  gLabels.selectAll('text')
    .data(data)
    .join('text')
    .attr('x', centreSpacing / 2-45)
    .attr('y', d => y(d.program) + y.bandwidth() / 2+5)
    .style('font-weight', 600)
    .text((d, i) => d.program);

  gLabels
    .append('text')
    .text('Years programming')
    .attr('x', centreSpacing / 2-90)
    .attr('y', -0)
    .style('font-size', '20px')
    .style('font-weight', 600);


  // Axis Update
  xAxis
    .transition()
    .duration(1000)
    .call(d3.axisBottom(x).ticks(3))
    .selectAll("text")
      .attr("font-size", "15px");

  xAxisReverse
    .transition()
    .duration(1000)
    .call(d3.axisBottom(xReverse).ticks(3))
    .selectAll("text")
      .attr("font-size", "15px");
}


// === Years Programming ===
d3.csv(baseAssetsUrl + "program.csv").then(function(dataload){

  // default view
  var data = dataload.filter(function(d) { 
    var sq = d3.select("#filter7").property("value");
    return d.year === sq;
  });

  update(data)


  // on change
  d3.select("#filter7").on("change", function() {

    var data = dataload.filter(function(d) { 
      var sq = d3.select("#filter7").property("value");
      return d.year === sq;
    });

    update(data);
  });

});


function create_stacked_bar(data, subgroups, x_global, y_range, color) {

  const groups = data.map(d => d.Year)

  // Add X axis
  const x = d3.scaleBand()
      .domain(groups)
      .range([0, width/2.5])
      .padding([0.2])
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/1.01})`)
    .call(d3.axisBottom(x).tickSizeOuter(0))
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "15px")
      .attr("font-weight", 300)
      .attr("text-anchor", "middle")

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, y_range])
    .range([ height/2.3, 0]);
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/1.85})`)
    .call(d3.axisLeft(y).ticks(0))
    .selectAll("text")
      .attr("font-size", "11px")
      .attr("font-weight", 300);

  //s Stack data
  const stackedData = d3.stack()
    .keys(subgroups)
    (data)

  // Show the bars
  const bars = svg.append("g")
    .selectAll("g")
    .data(stackedData)
    .join("g")
    .attr("transform", `translate(${x_global}, ${height/1.85})`)
      .attr("fill", d => color(d.key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("class", d => "myRect " + d.key ) 
      .selectAll("rect")
      .data(d => d)
      .join("rect")
        .attr("x", d => x(d.data.Year))
        .attr("y", d => y(d[1]))
        .attr("height", d => y(d[0]) - y(d[1]))
        .attr("width",x.bandwidth())

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

  bars
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    const subGroupName = d3.select(this.parentNode).datum().key 
          
    d3.selectAll(".myRect").style("opacity", 0.2)  
          
    d3.selectAll("."+subGroupName).style("opacity",1)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);

    const subgroupName = d3.select(this.parentNode).datum().key;
    const subgroupValue = dt.data[subgroupName];

    div.html("Language: " + subgroupName + "<br>" + "Frequency: " + subgroupValue)
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll(".myRect")
          .style("opacity",1) 

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })

}


// === Preferences ===
d3.csv(baseAssetsUrl + "pref_man.csv").then(function(data){

  const x_global = 50
  const y_range = 20000
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#385742", "#5F9571", "#9BBFA7"]);

  svg.append("text")
  .attr("y", "55%")
  .attr("x", "25%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

d3.csv(baseAssetsUrl + "pref_woman.csv").then(function(data){

  const x_global = 500
  const y_range = 5000
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#CE8E27", "#DEA954", "#E8C387"]);

  svg.append("text")
  .attr("y", "55%")
  .attr("x", "70%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [15]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='lang_eda', 
#                  config=CONFIG, anonymous="allow")
# man, woman = wb_prep_data(pref)
# create_wandb_plot(x_data=man["Category"], 
#                   y_data=man["Count"],
#                   x_name="Language Pref", y_name="Count", 
#                   title="Gentlemen: Language Preferences", log="pref1", plot="bar")
# create_wandb_plot(x_data=woman["Category"], 
#                   y_data=woman["Count"],
#                   x_name="Language Pref", y_name="Count", 
#                   title="Ladies: Language Preferences", log="pref2", plot="bar")
# wandb.finish()

<p style="font-family: times-new-roman">Both genders look the same in terms of distribution of languages and what they use more often in their projects.</p>

<p style="font-family: times-new-roman">A few trends to mention:</p>

* <p style="font-family: times-new-roman"><b>C & C++ have been increasing a few percentage points</b>, meaning that there are a few more respondents that use these 2 languages than in previous years.</p>
* <p style="font-family: times-new-roman"><b>R has been decreasing in popularity</b>, as we can also see in respondents' recommendations. Still, 💃<b>women tend to use it more</b> (23.4% in 2021) than 🎩men (19.8% in 2021).</p>

<div class="alert success-alert" style="font-family: times-new-roman">
  <h4 style="font-family: times-new-roman"><center>👀 Psst! Hover your mouse over the streamgraphs to see the legend and more information!</center></h4>
</div>

In [16]:
cols = list(df.columns[8:19])
cols.extend(["Year", "What is your gender? - Selected Choice"])

langs = df[cols]
langs.columns = ["Python", "R", "SQL", "C", "C++", "Java",
                 "Javascript", "Julia", "Swift", "Bash", "MATLAB",
                 "Year", "Gender"]
langs = langs[langs["Gender"].isin(["Man", "Woman"])]
langs = langs[langs["Year"]!=2017]

langs = langs.groupby(["Year", "Gender"]).count().reset_index()
langs = pd.merge(langs, total, on=["Year", "Gender"])

cols_perc = list(langs.columns[2:-1])
for c in cols_perc:
    langs[c] = langs[c]/langs["Count"] * 100
    langs[c] = langs[c].apply(lambda x: round(x, 1))
langs.drop(columns="Count", inplace=True)

langs_man = langs[langs["Gender"] == "Man"].drop(columns=["Gender", "Julia", "Swift"])
langs_woman = langs[langs["Gender"] == "Woman"].drop(columns=["Gender", "Julia", "Swift"])

langs_man.to_csv("langs_man.csv", index=False)
langs_woman.to_csv("langs_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man8.png')
Image.fromarray(im).save("man8.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady8.png')
Image.fromarray(im).save("lady8.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all8 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all8 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all8">
	<center><h1 style="font-family: parklane">Evolution of language</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences -</i></h3></center>
	<div class="row">
		<div id="map8-gents" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map8-ladies" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all8 = document.getElementsByClassName('all8')[0];
$all8.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM
const chart_size = 150
const x_global = 150

function create_graph(data, svg, name){

  const XY_axis = height/2-200

  svg.append("text")
  .attr("transform", `translate(${x_global-60}, ${XY_axis-10})`)
  .attr("font-size", "24px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text(name)

  data.forEach(d => {
    d.Year = Number(d.Year)
  })

  // List of groups
  var keys = data.columns.slice(1)

  // Stacking
  var series = d3.stack()
    .keys(keys)
    .offset(d3.stackOffsetSilhouette)
    (data)

  // X Axis
  var x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([0, width/1.5])

  var xAxis = g => g
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisBottom(x)
            .tickSize(height/1.2).ticks(4).tickFormat(d3.format("d")))
    .call(g => g.select(".domain").remove())
    .call(g => g.selectAll(".tick line")
                .attr("stroke", "#3A3A3A")
                .attr('stroke-dasharray', '5 5'))
    .call(g => g.selectAll(".tick")
                .select('text')
                .attr('fill', "#3A3A3A")
                .style("font-size", 15))

  // Y Axis
  var y = d3.scaleLinear()
  .domain([-chart_size, chart_size])
  .range([height/1.2, 0])
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisLeft(y).ticks(0))

  // Area Chart
  var area = d3.area()
  .x(d => x(d.data.Year))
  .y0(d => y(d[0]))
  .y1(d => y(d[1]))

  // Color
  const color = d3.scaleOrdinal()
    .domain(keys)
    .range(["#D67A83", "#D05257", "#D87F58", "#CE8E27",
     "#5F9571", "#12A6B6", "#74A5D2", "#226BBF", "#5D1D46"])

  // Show
  const path = svg.append("g")
    .selectAll("path")
    .data(series)
    .join("path")
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
      .attr("data-label", d => d.key)
      .attr("fill", ({key}) => color(key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.3)
      .attr("d", area)
      .attr("opacity", 0.8)

  svg.append("g")
      .call(xAxis)

  // ==== Interactive ====
  const hover = (svg, path) => {
  
    const line = svg.append("g")
         .attr("display", "none");
  
    line.append("g")
        .selectAll("line")
        .data(series)
        .join("line")
        .attr("class", "cursor-line")
        .attr("fill", "#fff")
        .attr("stroke-width", 0.8)
        .attr("x1", 10)
        .attr("y1", height)
        .attr("x2", 10)
        .attr("y2", 10);

    line.append("text")
      .attr("class", "text-year")
      .attr("font-size", 14)
      .attr("x", 0)
      .attr("y", 26)
      .attr("transform", "rotate(-90 20 20) translate(0, -20)");
    
    line.append("g")
        .selectAll("text")
        .data(series)
        .join("text")
        .attr("class", "text-label")
        .attr("font-weight", 900)
        .attr("font-size", 17)
        .attr("y", height-10)
        .attr("x", -8)
        .attr("transform", 
        "rotate(-90 10 460)");

    line.append("g")
        .selectAll("text")
        .data(data)
        .join("text")
        .attr("class", "text-value")
        .attr("font-weight", 700)
        .attr("font-size", 14)
        .attr("y", height-(height*0.94))
        .attr("x", 14);

    const mousemove = (event) => {
  
      event?.preventDefault();
      const pointer = d3.pointer(event);

      const xm = x.invert(pointer[0]);
      const label = d3.select(event.target).attr("data-label");
  
      line.attr("transform", `translate(${pointer[0]}, 0)`)
          .style("visibility", "inherit");
    
          line.select(".text-year")
          .text(parseInt(xm));
      
      line.selectAll(".text-value")
        .text(d => d.Year === parseInt(xm) ? d[label]+"%" : "");

      line.selectAll(".text-label")
        .style("visibility", "hidden")
        .filter(d => d.key === label)
        .style("visibility", "inherit")
        .attr("fill", d => d.key === label ? color(d.key) : "#000")
        .text(label);
      
      line.selectAll(".cursor-line")
        .attr("stroke", "fff0")
        .filter(d => d.key === label)
        .attr("stroke", d => d.key === label ? color(d.key) : "#fff0")
      
      path.attr("opacity", d => d.key === label ? 0.8 : 0.3);
      
      if (label === null) {
        path.attr("opacity", 0.8);
        line.style("visibility", "hidden");
      }
    }

    const mouseenter = () => {
      line.attr("display", null);
    }
  
    const mouseleave = () => {
      line.attr("display", "none");
    }

    svg
        .on("mousemove", mousemove)
        .on("mouseenter", mouseenter)
        .on("mouseleave", mouseleave)

  }

  svg.call(hover, path);
}

// ===== MEN =====
const svg1 = d3.select("#map8-gents").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"man8.png")
    .attr("x", "-12%")
    .attr("y", "13%")
    .attr("width", 370)
    .attr("height", 370)
    .style("opacity", 1)

// Comment
const annot1 = [
  {
  note: { 
    label: "Have been increasing in popularity (for both men and women).",
    title: "C & C++",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 190,
  dy: +35,
  dx: +30
}]

svg1.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

d3.csv(baseAssetsUrl + "langs_man.csv").then(function(data){

  const name = "Gentlemen"
  create_graph(data, svg1, name)

});

// ===== WOMEN =====
const svg2 = d3.select("#map8-ladies").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Lady
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"lady8.png")
.attr("x", "-17%")
.attr("y", "2%")
.attr("width", 510)
.attr("height", 510)
.style("opacity", 1)

// Comment
const annot2 = [
  {
  note: { 
    label: "Has been decreasing in popularity (for both genders).",
    title: "R",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 280,
  dy: -35,
  dx: +30
}]

svg2.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot2))

d3.csv(baseAssetsUrl + "langs_woman.csv").then(function(data){

  const name = "Ladies"
  create_graph(data, svg2, name)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: parklane">2.3 Environment & Surroundings</h2>

<h2 style="font-family: times-new-roman">The Environment</h2>

<p style="font-family: times-new-roman">Regarding the Data Science setup and what the genders use most often within their Data Science environment, we can see that:</p>

* <p style="font-family: times-new-roman"><b>Visual Studio increased in popularity</b> significantly in 2021, after a slight dip in 2020. 🎩Gents use it more often (49% in 2021) than 💃ladies do (39% in 2021).</p>
* <p style="font-family: times-new-roman">After many years in which <b>Jupyter</b> has been having the majority of the usage (half of the respondents said they use Jupyter frequently), it <b>has lost it's popularity</b> suddenly in 2021, with ~20% of users still saying that they use it on a regular basis.</p>

In [17]:
cols = list(df.columns[22:31])
new_names = ["Jupyter", "RStudio", "Visual Studio", "PyCharm",
             "Spyder", "Notepad++", "Sublime Text", "Vim/Emacs",
             "MATLAB"]

ide = get_data_multiple_answers(cols, new_names)
ide = ide[ide["Year"]!=2017]

ide_man = ide[ide["Gender"] == "Man"].drop(columns=["Gender"])
ide_woman = ide[ide["Gender"] == "Woman"].drop(columns=["Gender"])

ide_man.to_csv("ide_man.csv", index=False)
ide_woman.to_csv("ide_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man9.png')
Image.fromarray(im).save("man9.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady9.png')
Image.fromarray(im).save("lady9.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all9 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all9 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all9">
	<center><h1 style="font-family: parklane">Evolution of the Environment</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences -</i></h3></center>
	<div class="row">
		<div id="map9-gents" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map9-ladies" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all9 = document.getElementsByClassName('all9')[0];
$all9.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM
const chart_size = 150
const x_global = 150

function create_graph(data, svg, name){

  const XY_axis = height/2-200

  svg.append("text")
  .attr("transform", `translate(${x_global-60}, ${XY_axis-10})`)
  .attr("font-size", "24px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text(name)

  data.forEach(d => {
    d.Year = Number(d.Year)
  })

  // List of groups
  var keys = data.columns.slice(1)

  // Stacking
  var series = d3.stack()
    .keys(keys)
    .offset(d3.stackOffsetSilhouette)
    (data)

  // X Axis
  var x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([0, width/1.5])

  var xAxis = g => g
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisBottom(x)
            .tickSize(height/1.2).ticks(4).tickFormat(d3.format("d")))
    .call(g => g.select(".domain").remove())
    .call(g => g.selectAll(".tick line")
                .attr("stroke", "#3A3A3A")
                .attr('stroke-dasharray', '5 5'))
    .call(g => g.selectAll(".tick")
                .select('text')
                .attr('fill', "#3A3A3A")
                .style("font-size", 15))

  // Y Axis
  var y = d3.scaleLinear()
  .domain([-chart_size, chart_size])
  .range([height/1.2, 0])
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisLeft(y).ticks(0))

  // Area Chart
  var area = d3.area()
  .x(d => x(d.data.Year))
  .y0(d => y(d[0]))
  .y1(d => y(d[1]))

  // Color
  const color = d3.scaleOrdinal()
    .domain(keys)
    .range(["#D67A83", "#D05257", "#D87F58", "#CE8E27",
     "#5F9571", "#12A6B6", "#74A5D2", "#226BBF", "#5D1D46"])

  // Show
  const path = svg.append("g")
    .selectAll("path")
    .data(series)
    .join("path")
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
      .attr("data-label", d => d.key)
      .attr("fill", ({key}) => color(key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("d", area)
      .attr("opacity", 0.8)

  svg.append("g")
      .call(xAxis)

  // ==== Interactive ====
  const hover = (svg, path) => {
  
    const line = svg.append("g")
         .attr("display", "none");
  
    line.append("g")
        .selectAll("line")
        .data(series)
        .join("line")
        .attr("class", "cursor-line")
        .attr("fill", "#fff")
        .attr("stroke-width", 0.8)
        .attr("x1", 10)
        .attr("y1", height)
        .attr("x2", 10)
        .attr("y2", 10);

    line.append("text")
      .attr("class", "text-year")
      .attr("font-size", 14)
      .attr("x", 0)
      .attr("y", 26)
      .attr("transform", "rotate(-90 20 20) translate(0, -20)");
    
    line.append("g")
        .selectAll("text")
        .data(series)
        .join("text")
        .attr("class", "text-label")
        .attr("font-weight", 900)
        .attr("font-size", 17)
        .attr("y", height-10)
        .attr("x", -8)
        .attr("transform", 
        "rotate(-90 10 460)");

    line.append("g")
        .selectAll("text")
        .data(data)
        .join("text")
        .attr("class", "text-value")
        .attr("font-weight", 700)
        .attr("font-size", 14)
        .attr("y", height-(height*0.94))
        .attr("x", 14);

    const mousemove = (event) => {
  
      event?.preventDefault();
      const pointer = d3.pointer(event);

      const xm = x.invert(pointer[0]);
      const label = d3.select(event.target).attr("data-label");
  
      line.attr("transform", `translate(${pointer[0]}, 0)`)
          .style("visibility", "inherit");
    
          line.select(".text-year")
          .text(parseInt(xm));
      
      line.selectAll(".text-value")
        .text(d => d.Year === parseInt(xm) ? d[label]+"%" : "");

      line.selectAll(".text-label")
        .style("visibility", "hidden")
        .filter(d => d.key === label)
        .style("visibility", "inherit")
        .attr("fill", d => d.key === label ? color(d.key) : "#000")
        .text(label);
      
      line.selectAll(".cursor-line")
        .attr("stroke", "fff0")
        .filter(d => d.key === label)
        .attr("stroke", d => d.key === label ? color(d.key) : "#fff0")
      
      path.attr("opacity", d => d.key === label ? 0.8 : 0.3);
      
      if (label === null) {
        path.attr("opacity", 0.8);
        line.style("visibility", "hidden");
      }
    }

    const mouseenter = () => {
      line.attr("display", null);
    }
  
    const mouseleave = () => {
      line.attr("display", "none");
    }

    svg
        .on("mousemove", mousemove)
        .on("mouseenter", mouseenter)
        .on("mouseleave", mouseleave)

  }

  svg.call(hover, path);
}

// ===== MEN =====
const svg1 = d3.select("#map9-gents").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"man9.png")
    .attr("x", "-17%")
    .attr("y", "12%")
    .attr("width", 450)
    .attr("height", 450)
    .style("opacity", 1)

// Comment
const annot1 = [
  {
  note: { 
    label: "Got discovered by Data Scientists and increased in popularity significantly in 2021.",
    title: "Visual Studio",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 270,
  dy: -35,
  dx: +30
}]

svg1.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

d3.csv(baseAssetsUrl + "ide_man.csv").then(function(data){

  const name = "Gentlemen"
  create_graph(data, svg1, name)

});

// ===== WOMEN =====
const svg2 = d3.select("#map9-ladies").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Lady
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"lady9.png")
.attr("x", "-15%")
.attr("y", "8%")
.attr("width", 450)
.attr("height", 430)
.style("opacity", 450)

// Comment
const annot2 = [
  {
  note: { 
    label: "In comparison, Jupyter has been losing it's traction and in 2021 decreased in popularity for both genders.",
    title: "Jupyter",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 330,
  dy: -35,
  dx: +30
}]

svg2.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot2))

d3.csv(baseAssetsUrl + "ide_woman.csv").then(function(data){

  const name = "Ladies"
  create_graph(data, svg2, name)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: times-new-roman">The Surroundings</h2>

<p style="font-family: times-new-roman">Regarding the preferences for hosted notebooks, the trends are as follow:</p>

* <p style="font-family: times-new-roman"><b>Kaggle and Colab have been increasing in popularity</b> each year, being the most prefered hosted notebooks out of all choices in 2021.</p>
* <p style="font-family: times-new-roman">There is still a big pool of people (around 30% in 2021 for both genders) that still <b>never use any hosted notebook</b> on a regular basis. I assume these are the people that compete frequently in Kaggle competitions (and win), but <i>don't like to post notebooks</i> for these competitions as well. I would also assume that these people use high GPU/TPU power and <i>have local very powerful computers/workstations</i>. I would love to have a chat with them and understand how could I develop these hosted notebooks in order to help them in their Data Science work.</p>

<p style="font-family: times-new-roman">Code Ocean, IBM Watson Studio, Amazon Sagemaker Studio & EMR, Google Cloud Notebooks & Datalab, Databricks Collaborative Notebooks have less than 8% usage between the respondents, so they were not shown within the graphs.</p>

<i>TODO: Look more into the profile of None.</i>

In [18]:
cols = list(df.columns[[33, 34, 35, 36, 37, 45]])
new_names = ["Kaggle", "Colab", "Azure", "Paperspace/Gradient",
             "Binder/ Jupyter Hub", "None"]

hosted = get_data_multiple_answers(cols, new_names)
hosted = hosted[hosted["Year"]!=2017]

hosted_man = hosted[hosted["Gender"] == "Man"].drop(columns=["Gender"])
hosted_woman = hosted[hosted["Gender"] == "Woman"].drop(columns=["Gender"])

hosted_man.to_csv("hosted_man.csv", index=False)
hosted_woman.to_csv("hosted_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man10.png')
Image.fromarray(im).save("man10.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady10.png')
Image.fromarray(im).save("lady10.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all10 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all10 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all10">
	<center><h1 style="font-family: parklane">Evolution of the Surroundings</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences (hosted notebooks) -</i></h3></center>
	<div class="row">
		<div id="map10-gents" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map10-ladies" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all10 = document.getElementsByClassName('all10')[0];
$all10.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM
const chart_size = 70
const x_global = 150

function create_graph(data, svg, name){

  const XY_axis = height/2-200

  svg.append("text")
  .attr("transform", `translate(${x_global-60}, ${XY_axis-10})`)
  .attr("font-size", "24px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text(name)

  data.forEach(d => {
    d.Year = Number(d.Year)
  })

  // List of groups
  var keys = data.columns.slice(1)

  // Stacking
  var series = d3.stack()
    .keys(keys)
    .offset(d3.stackOffsetSilhouette)
    (data)

  // X Axis
  var x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([0, width/1.5])

  var xAxis = g => g
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisBottom(x)
            .tickSize(height/1.2).ticks(4).tickFormat(d3.format("d")))
    .call(g => g.select(".domain").remove())
    .call(g => g.selectAll(".tick line")
                .attr("stroke", "#3A3A3A")
                .attr('stroke-dasharray', '5 5'))
    .call(g => g.selectAll(".tick")
                .select('text')
                .attr('fill', "#3A3A3A")
                .style("font-size", 15))

  // Y Axis
  var y = d3.scaleLinear()
  .domain([-chart_size, chart_size])
  .range([height/1.2, 0])
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisLeft(y).ticks(0))

  // Area Chart
  var area = d3.area()
  .x(d => x(d.data.Year))
  .y0(d => y(d[0]))
  .y1(d => y(d[1]))

  // Color
  const color = d3.scaleOrdinal()
    .domain(keys)
    .range(["#D67A83", "#D05257", "#D87F58", "#CE8E27",
     "#5F9571", "#12A6B6", "#74A5D2", "#226BBF", "#5D1D46"])

  // Show
  const path = svg.append("g")
    .selectAll("path")
    .data(series)
    .join("path")
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
      .attr("data-label", d => d.key)
      .attr("fill", ({key}) => color(key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("d", area)
      .attr("opacity", 0.8)

  svg.append("g")
      .call(xAxis)

  // ==== Interactive ====
  const hover = (svg, path) => {
  
    const line = svg.append("g")
         .attr("display", "none");
  
    line.append("g")
        .selectAll("line")
        .data(series)
        .join("line")
        .attr("class", "cursor-line")
        .attr("fill", "#fff")
        .attr("stroke-width", 0.8)
        .attr("x1", 10)
        .attr("y1", height)
        .attr("x2", 10)
        .attr("y2", 10);

    line.append("text")
      .attr("class", "text-year")
      .attr("font-size", 14)
      .attr("x", 0)
      .attr("y", 26)
      .attr("transform", "rotate(-90 20 20) translate(0, -20)");
    
    line.append("g")
        .selectAll("text")
        .data(series)
        .join("text")
        .attr("class", "text-label")
        .attr("font-weight", 900)
        .attr("font-size", 17)
        .attr("y", height-10)
        .attr("x", -8)
        .attr("transform", 
        "rotate(-90 10 460)");

    line.append("g")
        .selectAll("text")
        .data(data)
        .join("text")
        .attr("class", "text-value")
        .attr("font-weight", 700)
        .attr("font-size", 14)
        .attr("y", height-(height*0.94))
        .attr("x", 14);

    const mousemove = (event) => {
  
      event?.preventDefault();
      const pointer = d3.pointer(event);

      const xm = x.invert(pointer[0]);
      const label = d3.select(event.target).attr("data-label");
  
      line.attr("transform", `translate(${pointer[0]}, 0)`)
          .style("visibility", "inherit");
    
          line.select(".text-year")
          .text(parseInt(xm));
      
      line.selectAll(".text-value")
        .text(d => d.Year === parseInt(xm) ? d[label]+"%" : "");

      line.selectAll(".text-label")
        .style("visibility", "hidden")
        .filter(d => d.key === label)
        .style("visibility", "inherit")
        .attr("fill", d => d.key === label ? color(d.key) : "#000")
        .text(label);
      
      line.selectAll(".cursor-line")
        .attr("stroke", "fff0")
        .filter(d => d.key === label)
        .attr("stroke", d => d.key === label ? color(d.key) : "#fff0")
      
      path.attr("opacity", d => d.key === label ? 0.8 : 0.3);
      
      if (label === null) {
        path.attr("opacity", 0.8);
        line.style("visibility", "hidden");
      }
    }

    const mouseenter = () => {
      line.attr("display", null);
    }
  
    const mouseleave = () => {
      line.attr("display", "none");
    }

    svg
        .on("mousemove", mousemove)
        .on("mouseenter", mouseenter)
        .on("mouseleave", mouseleave)

  }

  svg.call(hover, path);
}

// ===== MEN =====
const svg1 = d3.select("#map10-gents").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"man10.png")
    .attr("x", "-6%")
    .attr("y", "13%")
    .attr("width", 380)
    .attr("height", 380)
    .style("opacity", 1)

// Comment
const annot1 = [
  {
  note: { 
    label: "Quite a lot of respondents don't use any of the hosted notebooks platforms.",
    title: "None",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 120,
  dy: +35,
  dx: +30
}]

svg1.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

d3.csv(baseAssetsUrl + "hosted_man.csv").then(function(data){

  const name = "Gentlemen"
  create_graph(data, svg1, name)

});

// ===== WOMEN =====
const svg2 = d3.select("#map10-ladies").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Lady
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"lady10.png")
.attr("x", "-12%")
.attr("y", "14%")
.attr("width", 430)
.attr("height", 430)
.style("opacity", 1)

// Comment
const annot2 = [
  {
  note: { 
    label: "Both are increasing in popularity for both genders.",
    title: "Colab & Kaggle",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 300,
  dy: -35,
  dx: +30
}]

svg2.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot2))

d3.csv(baseAssetsUrl + "hosted_woman.csv").then(function(data){

  const name = "Ladies"
  create_graph(data, svg2, name)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: parklane">2.4 Special Sauces</h2>

<h2 style="font-family: times-new-roman">First Special Sauce: The Hardware</h2>

<p style="font-family: times-new-roman">There is a <b>smaller percentage of use for Workstations and Cloud Platforms</b> and an <b>increase in Personal Laptop's popularity</b>. </p>

<p style="font-family: times-new-roman">I believe this is the case because, as the overall percentage of respondents in 2021 consists of many more young adults (aged 18 to 24), they do not yet have the necessary budget and experience to work with heavy duty equipment, like specialized workstations and Cloud Platforms with online GPU or TPU.</p>

In [19]:
cols = ["Year", "What is your gender? - Selected Choice",
        df.columns[47]]
comp = df[cols]

comp.columns = ["Year", "Gender", "CompPlatform"]
comp = comp[comp["Gender"].isin(["Man", "Woman"])]

old_list = ['A laptop',
            'A cloud computing platform (AWS, Azure, GCP, hosted notebooks, etc)',
            'A personal computer / desktop',
            'A deep learning workstation (NVIDIA GTX, LambdaLabs, etc)', 
            'A personal computer or laptop']
new_list = ["Personal Laptop", "Cloud Platform", "Personal Laptop",
            "Workstation", "Personal Laptop"]

comp["CompPlatform"] = comp["CompPlatform"].replace(old_list, new_list)

comp = comp.value_counts().reset_index()
comp = pd.merge(comp, total, on=["Year", "Gender"])
comp["perc"] = comp[0]/comp["Count"]
comp["perc"] = comp["perc"].apply(lambda x: round(x, 2))
comp.drop(columns=[0, "Count"], axis=1, inplace=True)
comp = comp[~comp["CompPlatform"].isin(["None", "Other"])]

comp_man = comp[comp["Gender"] == "Man"].drop(columns=["Gender"])
comp_man = pd.pivot(data=comp_man, index="CompPlatform",
                    columns="Year", values="perc").reset_index()
comp_man.columns = ["group", "value1", "value2"]

comp_woman = comp[comp["Gender"] == "Woman"].drop(columns=["Gender"])
comp_woman = pd.pivot(data=comp_woman, index="CompPlatform",
                    columns="Year", values="perc").reset_index()
comp_woman.columns = ["group", "value1", "value2"]

comp_man.to_csv("comp_man.csv", index=False)
comp_woman.to_csv("comp_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man12.png')
Image.fromarray(im).save("man12.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady12.png')
Image.fromarray(im).save("lady12.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all12 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisGents path{
		stroke: #184E8B;
	}

	.axisGents line{
		stroke: #184E8B;
	}

	.axisGents text{
		stroke: #184E8B;
	}

	.annotation.gents text {
		fill: #184E8B;
	}

	.axisLadies path{
		stroke: #A81F40;
	}

	.axisLadies line{
		stroke: #A81F40;
	}

	.axisLadies text{
		stroke: #A81F40;
	}

	.annotation.ladies text {
		fill: #A81F40 ;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 1.3rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all12">
	<center><h1 style="font-family: parklane">Special Sauce: The Hardware</h1></center>
	<center><h2>Most often used Computing Platform</h2></center>
	<div class="row">
		<div id="map12" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all12 = document.getElementsByClassName('all12')[0];
$all12.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 10, RIGHT: 10, TOP: 10, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const color_2021 = "#D05257"
const color_2020 = "#226BBF"

const svg = d3.select("#map12").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

function wrap(text, width) {
text.each(function() {
  var text = d3.select(this),
      words = text.text().split(/\s+/).reverse(),
      word,
      line = [],
      lineNumber = 0,
      lineHeight = 0.5, // ems
      y = text.attr("y"),
      dy = parseFloat(text.attr("dy")),
      tspan = text.text(null).append("tspan").attr("x", 0).attr("y", y).attr("dy", dy + "em")
  while (word = words.pop()) {
    line.push(word)
    tspan.text(line.join(" "))
    if (tspan.node().getComputedTextLength() > width) {
      line.pop()
      tspan.text(line.join(" "))
      line = [word]
      tspan = text.append("tspan").attr("x", 0).attr("y", y).attr("dy", `${++lineNumber * lineHeight + dy}em`).text(word)
    }
  }
})
}


svg.append('line')
.style("stroke", "black")
.style("stroke-width", 1)
    .attr("x1", 480)
    .attr("y1", 150)
    .attr("x2", 480)
    .attr("y2", 900)

// LEGEND

// Legend
const keys = ["2020", "2021"]
const color = d3.scaleOrdinal()
.range([color_2020, color_2021])

const opacity = 0.7
const stroke_width = 3
const r = 30

var legend = svg.selectAll("graph")
.data(keys)
.join("g")
.attr("transform", `translate(${300}, ${height-height*0.9})`)

legend
.append("circle")
  .attr("cx", (d, i) => (i%2 === 0 ? 110 : 250))
  .attr("cy", -40) 
  .attr("r", r)
  .attr("opacity", opacity)
  .style("stroke", "black")
  .style("stroke-width", stroke_width)
  .style("fill", function(d){ return color(d)})

legend
  .append("text")
    .attr("x", (d, i) => (i%2 === 0 ? 150 : 290))
    .attr("y", -40) 
    .attr("font-size", "21px")
    .style("fill", function(d){ return color(d)})
    .text(function(d){ return d})
    .attr("text-anchor", "left")
    .attr("font-weight", 700)
    .style("alignment-baseline", "middle")


// Global Y axis
const y = d3.scaleBand()
.range([ 0, height/1.3 ])
.domain(["Cloud Platform", "Personal Laptop", "Workstation"])
.padding(1);
svg.append("g")
.attr("transform", `translate(${110}, ${height/3-140})`)
.call(d3.axisLeft(y).tickSize(0))
.selectAll("text")
.call(wrap, y.bandwidth())
.attr("y", -10)
.attr("font-size", "16px")
.attr("font-weight", 600)

// LOLIPOP FUNCTION
function create_lolipop(data, x_global, y_global, 
  color_2020, color_2021, limit1, limit2, axisClass){
  // Add X axis
  const x = d3.scaleLinear()
    .domain([limit1, limit2])
    .range([ 0, width/2.7]);
  svg.append("g")
  .attr("class", axisClass)
    .attr("transform", `translate(${x_global}, ${height/1.02})`)
    .call(d3.axisBottom(x).tickFormat(d3.format(".0%")))
    .selectAll("text")
    .attr("font-size", "16px")
    .attr("font-weight", 300)

  // Lines
  svg.selectAll("myline")
    .data(data)
    .join("line")
    .attr("transform", `translate(${x_global}, ${y_global})`)
      .attr("x1", function(d) { return x(d.value1); })
      .attr("x2", function(d) { return x(d.value2); })
      .attr("y1", function(d) { return y(d.group); })
      .attr("y2", function(d) { return y(d.group); })
      .attr("stroke", "grey")
      .attr("stroke-width", "1px")

  // Circles of variable 1
  svg.selectAll("mycircle")
    .data(data)
    .join("circle")
    .attr("transform", `translate(${x_global}, ${y_global})`)
      .attr("cx", function(d) { return x(d.value1); })
      .attr("cy", function(d) { return y(d.group); })
      .attr("r", r)
      .attr("opacity", opacity)
      .style("fill", color_2020)
      .style("stroke", "black")
      .style("stroke-width", stroke_width)

  // Circles of variable 2
  svg.selectAll("mycircle")
    .data(data)
    .join("circle")
    .attr("transform", `translate(${x_global}, ${y_global})`)
      .attr("cx", function(d) { return x(d.value2); })
      .attr("cy", function(d) { return y(d.group); })
      .attr("r", r)
      .attr("opacity", opacity)
      .style("fill", color_2021)
      .style("stroke", "black")
      .style("stroke-width", stroke_width)
}


// === Men ===
d3.csv(baseAssetsUrl + "comp_man.csv").then(function(data){

  const x_global = 110
  const y_global = height/3-140
  const limit1 = 0.03
  const limit2 = 0.83
  const axisClass = "axisGents"

  svg.append("text")
  .attr("transform", `translate(${x_global+180}, ${y_global+50})`)
  .attr("font-size", "30px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  // Men
  svg.append("image")
  .attr("xlink:href", baseAssetsUrl+"man12.png")
      .attr("x", x_global+50)
      .attr("y", y_global-70)
      .attr("width", 250)
      .attr("height", 250)
      .style("opacity", 1)

  create_lolipop(data, x_global, y_global, color_2020, 
    color_2021, limit1, limit2, axisClass)

  });

// === Women ===
d3.csv(baseAssetsUrl + "comp_woman.csv").then(function(data){

  const x_global = 520
  const y_global = height/3-140
  const limit1 = 0.02
  const limit2 = 0.83
  const axisClass = "axisLadies"

  svg.append("text")
  .attr("transform", `translate(${x_global+180}, ${y_global+50})`)
  .attr("font-size", "30px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  // Women
  svg.append("image")
  .attr("xlink:href", baseAssetsUrl+"lady12.png")
      .attr("x", x_global-135)
      .attr("y", y_global-520)
      .attr("width", 600)
      .attr("height", 600)
      .style("opacity", 1)

  create_lolipop(data, x_global, y_global, color_2020, 
    color_2021, limit1, limit2, axisClass)

  });


// Annot
const annot = [
  {
    note: { 
      label: "Have been decreasing in popularity in 2021.",
      title: "Cloud Platforms & Workstations",
      wrap: 180, 
      padding: 5, 
    },
    className: "gents",
    connector: { end: "arrow" },
    color: ["#ffffff"],
    x: 150,
    y: 370,
    dy: 20,
    dx: 20
  },
  {
  note: { 
    label: "More and more respondents are using personal laptops (this change is affected by the shift in respondent's age).",
    title: "Personal Laptop",
    wrap: 180, 
    padding: 5, 
  },
  className: "ladies",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 830,
  y: 530,
  dy: 20,
  dx: -20
}]

svg.append("g")
  .style('font-size', 19)
  .call(d3.annotation()
          .annotations(annot))

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: times-new-roman">Second Special Sauce: The Power</h2>

<p style="font-family: times-new-roman">Regarding the power, we can observe that:</p>

* <p style="font-family: times-new-roman">After an increase in 2020, <b>GPU is losing popularity</b> in 2021 for both genders, while <b>the rise of TPU</b> is more and more visible. The amount of respondents that say that they've <b>used TPU more than once increased</b> in 2021 vs 2019, so we should start being more used to hearing about TPU in the future!</p>
* <p style="font-family: times-new-roman">🎩Men are more fond of TPU than 💃women.</p>
* <p style="font-family: times-new-roman">Interesting enough, the None category increased in popularity as well - as people on the platform affirm they never used neither GPU nor TPU. In 2021, there are more 💃ladies (58%) than 🎩gents (~50%) that prefere to not use any special hardware.</p>

In [20]:
# === Specialized Hardware ===
cols = list(df.columns[48:53])
new_names = ["CPU", "GPU", "TPU", "None", "Other"]

hardware = get_data_multiple_answers(cols, new_names)
hardware.drop(columns=["CPU", "Other"], axis=1, inplace=True)
hardware = hardware[(hardware["Year"]!=2017) & (hardware["Year"]!=2018)]

hardware_man = hardware[hardware["Gender"]=="Man"].drop(columns=["Gender"])
hardware_woman = hardware[hardware["Gender"]=="Woman"].drop(columns=["Gender"])

hardware_man.to_csv("hardware_man.csv", index=False)
hardware_woman.to_csv("hardware_woman.csv", index=False)

# === TPU Usage ===
cols = ["Year", "What is your gender? - Selected Choice",
        df.columns[53]]
tpu = df[cols]

tpu.columns = ["Year", "Gender", "TPU"]
tpu = tpu[tpu["Gender"].isin(["Man", "Woman"])]

old_list = ['2-5 times', 'Never', 'More than 25 times', 
            'Once', '6-25 times', '6-24 times', '> 25 times']
new_list = ['3. 2-5 times', '1. Never', '5. > 25 times', 
            '2. Once', '4. 6-25 times', '4. 6-25 times', '5. > 25 times']

tpu["TPU"] = tpu["TPU"].replace(old_list, new_list)
tpu = tpu.value_counts().reset_index()

tpu = pd.pivot(tpu, index=["Year", "TPU"],
               columns="Gender", values=0).reset_index()
tpu.columns = ["year", "tpu", "male", "female"]

tpu.to_csv("tpu.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man11.png')
Image.fromarray(im).save("man11.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady11.png')
Image.fromarray(im).save("lady11.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all11 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all11">
	<center><h1 style="font-family: parklane">Special Sauce: The Power</h1></center>
	<center><h3><i>- The Special Hardware they like to use -</i></h3></center>
	<label for="filter11" style="font-size: 25px">Select Year Here:</label>
	<select id="filter11" style="font-size: 20px">
		<option value="2021">2021</option>
		<option value="2020">2020</option>
		<option value="2019">2019</option>
	  </select>
	<div class="row">
		<div id="map11" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all11 = document.getElementsByClassName('all11')[0];
$all11.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const centreSpacing = 100

const svg = d3.select("#map11").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man11.png")
    .attr("x", "-8%")
    .attr("y", "53%")
    .attr("width", 380)
    .attr("height", 380)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady11.png")
.attr("x", "63%")
.attr("y", "53%")
.attr("width", 420)
.attr("height", 420)
.style("opacity", 1)

// Gradient
const defs = svg.append('defs');

const bgGradient1 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient4')

bgGradient1
  .append('stop')
  .attr('stop-color', '#AB6130')
  .attr('offset', '0%');
bgGradient1
  .append('stop')
  .attr('stop-color', '#873D0B')
  .attr('offset', '100%');

const bgGradient2 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient5')

bgGradient2
  .append('stop')
  .attr('stop-color', '#5B8CB5')
  .attr('offset', '0%');
bgGradient2
  .append('stop')
  .attr('stop-color', '#87afcf')
  .attr('offset', '100%');

// ------ DEFAULTS ------
// Initialize scales
const y = d3.scaleBand()
.range([height/2.7, 0])
.padding(0.1)

const x = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxis = svg.append("g")
  .attr('transform', 'translate(165,' + (height-10) + ')')

const xReverse = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxisReverse = svg.append("g")
  .attr('transform', 'translate(515,' + (height-10) + ')')


// Male + Female + Labels objects
var gM = svg.append("g")
  .attr("transform", 
  "translate(" + 20 + "," + height/1.65 + ")");

var gF = svg.append("g")
  .attr('transform',
    'translate(' +
      (margin.LEFT + (width - centreSpacing) / 2 + centreSpacing) +
      "," +
      height/1.65 +
      ")");

var gLabels = svg
.append('g')
.attr(
  'transform',
  'translate(' +
    (margin.LEFT + (width - centreSpacing) / 2 + 
    ',' + margin.TOP + ')'));


// === UPDATE ===
function update(data) {

  data.forEach(d => {
    d.female = Number(d.female)
    d.male = Number(d.male)
  })

  // Scales
  y.domain(data.map(d => d.tpu))

  const maxVal = d3.max(data, d => d3.max([d.male, d.female]))

  x.domain([0, maxVal])
  xReverse.domain([0, maxVal])

  // Male
  gM.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', d => (width - centreSpacing) / 2 - x(d.male))
    .attr('y', d => y(d.tpu))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.male))
    .style('fill', 'url(#bg-gradient4)');


  // Female
  gF.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', 0)
    .attr('y', d => y(d.tpu))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.female))
    .style('fill', 'url(#bg-gradient5)');
  

  //Labels
  gLabels.selectAll('text')
    .data(data)
    .join('text')
    .attr('x', centreSpacing / 2-45)
    .attr('y', d => y(d.tpu) + y.bandwidth() +500)
    .style('font-weight', 600)
    .text((d, i) => d.tpu);

  gLabels
    .append('text')
    .text('Times you used TPU')
    .attr('x', centreSpacing / 2-90)
    .attr('y', height/2+40)
    .style('font-size', '22px')
    .style('font-weight', 600);


  // Axis Update
  xAxis
    .transition()
    .duration(1000)
    .call(d3.axisBottom(x).ticks(3))
    .selectAll("text")
      .attr("font-size", "15px");

  xAxisReverse
    .transition()
    .duration(1000)
    .call(d3.axisBottom(xReverse).ticks(3))
    .selectAll("text")
      .attr("font-size", "15px");
}


// === Years Programming ===
d3.csv(baseAssetsUrl + "tpu.csv").then(function(dataload){

  // default view
  var data = dataload.filter(function(d) { 
    var sq = d3.select("#filter11").property("value");
    return d.year === sq;
  });

  update(data)


  // on change
  d3.select("#filter11").on("change", function() {

    var data = dataload.filter(function(d) { 
      var sq = d3.select("#filter11").property("value");
      return d.year === sq;
    });

    update(data);
  });

});


function create_stacked_bar(data, subgroups, x_global, y_range, color) {

  const groups = data.map(d => d.Year)

  // Add X axis
  const x = d3.scaleBand()
      .domain(groups)
      .range([0, width/2.5])
      .padding([0.2])
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/2.2})`)
    .call(d3.axisBottom(x).tickSizeOuter(0))
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "15px")
      .attr("font-weight", 300)
      .attr("text-anchor", "middle")

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, y_range])
    .range([ height/2.3, 0]);
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/2.5-350})`)
    .call(d3.axisLeft(y).ticks(0))
    .selectAll("text")
      .attr("font-size", "11px")
      .attr("font-weight", 300);

  //s Stack data
  const stackedData = d3.stack()
    .keys(subgroups)
    (data)

  // Show the bars
  const bars = svg.append("g")
    .selectAll("g")
    .data(stackedData)
    .join("g")
    .attr("transform", `translate(${x_global}, ${height/2.5-350})`)
      .attr("fill", d => color(d.key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("class", d => "myRect " + d.key ) 
      .selectAll("rect")
      .data(d => d)
      .join("rect")
        .attr("x", d => x(d.data.Year))
        .attr("y", d => y(d[1]))
        .attr("height", d => y(d[0]) - y(d[1]))
        .attr("width",x.bandwidth())

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

  bars
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    const subGroupName = d3.select(this.parentNode).datum().key 
          
    d3.selectAll(".myRect").style("opacity", 0.2)  
          
    d3.selectAll("."+subGroupName).style("opacity",1)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);

    const subgroupName = d3.select(this.parentNode).datum().key;
    const subgroupValue = dt.data[subgroupName];

    div.html("Hardware: " + subgroupName + "<br>" + "Perc: " + subgroupValue + "%")
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll(".myRect")
          .style("opacity",1) 

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })

}


// === Preferences ===
d3.csv(baseAssetsUrl + "hardware_man.csv").then(function(data){

  const x_global = 50
  const y_range = 120
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#873D0B", "#AB6130", "#B78A6E"]);

  svg.append("text")
  .attr("y", "4%")
  .attr("x", "25%")
  .attr("font-size", "23px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

d3.csv(baseAssetsUrl + "hardware_woman.csv").then(function(data){

  const x_global = 500
  const y_range = 120
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#4984BB", "#87AFCF", "#AABED2"]);

  svg.append("text")
  .attr("y", "4%")
  .attr("x", "70%")
  .attr("font-size", "23px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

In [21]:
# # Save to W&B
# run = wandb.init(project='kaggle-survey-2021', name='power_eda', 
#                  config=CONFIG, anonymous="allow")
# man, woman = wb_prep_data(hardware)
# create_wandb_plot(x_data=man["Category"], 
#                   y_data=man["Count"],
#                   x_name="Special Hardware Pref", y_name="Count", 
#                   title="Gentlemen: Special Hardware Preferences", log="hardw1", plot="bar")
# create_wandb_plot(x_data=woman["Category"], 
#                   y_data=woman["Count"],
#                   x_name="Special Hardware Pref", y_name="Count",
#                   title="Ladies: Special Hardware Preferences", log="hard2", plot="bar")
# wandb.finish()

<div class="alert simple-alert" style="font-family: times-new-roman">
  <p style="font-family: times-new-roman"><center>👀 We now know that the majority of respondents are Students and Data Scientists, although 💃ladies do prefere a Data Analyst job, vs the 🎩gents who would rather go for Data Engineer. We also know they love and use Python the most, while starting to lose interest in R. Jupyter Notebooks are losing interest vs Visual Studio, which has been sky rocketing in 2021. In terms of special sauces, Personal Laptops are most used, as the pool of respondents is younger, therefore have less experience & resources. An interesting trend is the rise of TPU and the apparent fall of GPU, seen in both men and women, although 🎩gents use much more special hardware than 💃ladies do.</center></p>
</div>

<h1 style="font-family: parklane">3. Music at "the Wireless"</h1>

> <p style="font-family: times-new-roman">📻 Fun fact - <b>"the Wireless"</b> started being popular in the 1920s and it soon became an interesting point of attraction in households, especially for the youth. The elder and more "conservative" people (that leaved most of their life in the second half of the 1800s) were reluctant to use it, saying "it's just a passing phase". If you haven't guessed it, "the wireless" was to be and it's now named ... <b>the radio</b>. At that time, you had to have it installed by a professional and you would usually hear some news, live presidential/royal speeches and music (most popular genres at the time were blues, jazz, swing, regtime etc.).</p>

<center><img src="https://i.imgur.com/83PXDpU.gif" width=800></center>

<p style="font-family: times-new-roman"><i>Note: Inspiration from the TV series "Downton Abbey".</i></p>

<h2 style="font-family: parklane">3.1 "Jazzy" Visualizations</h2>

<p style="font-family: times-new-roman">...</p>

In [22]:
cols = list(df.columns[[54, 55, 56, 57, 58, 59, 61, 62, 63]])
new_names = ["Matplotlib", "Seaborn", "Plotly", "Ggplot", 
             "Shiny", "D3 js", "Bokeh", "Geoplotlib",
             "Leaflet/Folium"]

viz = get_data_multiple_answers(cols, new_names)
viz = viz[viz["Year"]!=2017]

viz_man = viz[viz["Gender"] == "Man"].drop(columns=["Gender"])
viz_woman = viz[viz["Gender"] == "Woman"].drop(columns=["Gender"])

viz_man.to_csv("viz_man.csv", index=False)
viz_woman.to_csv("viz_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man13.png')
Image.fromarray(im).save("man13.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady13.png')
Image.fromarray(im).save("lady13.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all13 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all13 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all13">
	<center><h1 style="font-family: parklane">Jazzy Visualizations</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences (visualization libraries) -</i></h3></center>
	<div class="row">
		<div id="map13-gents" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map13-ladies" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all13 = document.getElementsByClassName('all13')[0];
$all13.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM
const chart_size = 120
const x_global = 150

function create_graph(data, svg, name){

  const XY_axis = height/2-200

  svg.append("text")
  .attr("transform", `translate(${x_global-60}, ${XY_axis-10})`)
  .attr("font-size", "24px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text(name)

  data.forEach(d => {
    d.Year = Number(d.Year)
  })

  // List of groups
  var keys = data.columns.slice(1)

  // Stacking
  var series = d3.stack()
    .keys(keys)
    .offset(d3.stackOffsetSilhouette)
    (data)

  // X Axis
  var x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([0, width/1.5])

  var xAxis = g => g
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisBottom(x)
            .tickSize(height/1.2).ticks(4).tickFormat(d3.format("d")))
    .call(g => g.select(".domain").remove())
    .call(g => g.selectAll(".tick line")
                .attr("stroke", "#3A3A3A")
                .attr('stroke-dasharray', '5 5'))
    .call(g => g.selectAll(".tick")
                .select('text')
                .attr('fill', "#3A3A3A")
                .style("font-size", 15))

  // Y Axis
  var y = d3.scaleLinear()
  .domain([-chart_size, chart_size])
  .range([height/1.2, 0])
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisLeft(y).ticks(0))

  // Area Chart
  var area = d3.area()
  .x(d => x(d.data.Year))
  .y0(d => y(d[0]))
  .y1(d => y(d[1]))

  // Color
  const color = d3.scaleOrdinal()
    .domain(keys)
    .range(["#D67A83", "#D05257", "#D87F58", "#CE8E27",
     "#5F9571", "#12A6B6", "#74A5D2", "#226BBF", "#5D1D46"])
     .range(["#5D1D46", "#226BBF", "#74A5D2","#12A6B6",
     "#5F9571", "#CE8E27", "#D87F58", "#D05257", "#D67A83"])

  // Show
  const path = svg.append("g")
    .selectAll("path")
    .data(series)
    .join("path")
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
      .attr("data-label", d => d.key)
      .attr("fill", ({key}) => color(key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("d", area)
      .attr("opacity", 0.8)

  svg.append("g")
      .call(xAxis)

  // ==== Interactive ====
  const hover = (svg, path) => {
  
    const line = svg.append("g")
         .attr("display", "none");
  
    line.append("g")
        .selectAll("line")
        .data(series)
        .join("line")
        .attr("class", "cursor-line")
        .attr("fill", "#fff")
        .attr("stroke-width", 0.8)
        .attr("x1", 10)
        .attr("y1", height)
        .attr("x2", 10)
        .attr("y2", 10);

    line.append("text")
      .attr("class", "text-year")
      .attr("font-size", 14)
      .attr("x", 0)
      .attr("y", 26)
      .attr("transform", "rotate(-90 20 20) translate(0, -20)");
    
    line.append("g")
        .selectAll("text")
        .data(series)
        .join("text")
        .attr("class", "text-label")
        .attr("font-weight", 900)
        .attr("font-size", 17)
        .attr("y", height-10)
        .attr("x", -8)
        .attr("transform", 
        "rotate(-90 10 460)");

    line.append("g")
        .selectAll("text")
        .data(data)
        .join("text")
        .attr("class", "text-value")
        .attr("font-weight", 700)
        .attr("font-size", 14)
        .attr("y", height-(height*0.94))
        .attr("x", 14);

    const mousemove = (event) => {
  
      event?.preventDefault();
      const pointer = d3.pointer(event);

      const xm = x.invert(pointer[0]);
      const label = d3.select(event.target).attr("data-label");
  
      line.attr("transform", `translate(${pointer[0]}, 0)`)
          .style("visibility", "inherit");
    
          line.select(".text-year")
          .text(parseInt(xm));
      
      line.selectAll(".text-value")
        .text(d => d.Year === parseInt(xm) ? d[label]+"%" : "");

      line.selectAll(".text-label")
        .style("visibility", "hidden")
        .filter(d => d.key === label)
        .style("visibility", "inherit")
        .attr("fill", d => d.key === label ? color(d.key) : "#000")
        .text(label);
      
      line.selectAll(".cursor-line")
        .attr("stroke", "fff0")
        .filter(d => d.key === label)
        .attr("stroke", d => d.key === label ? color(d.key) : "#fff0")
      
      path.attr("opacity", d => d.key === label ? 0.8 : 0.3);
      
      if (label === null) {
        path.attr("opacity", 0.8);
        line.style("visibility", "hidden");
      }
    }

    const mouseenter = () => {
      line.attr("display", null);
    }
  
    const mouseleave = () => {
      line.attr("display", "none");
    }

    svg
        .on("mousemove", mousemove)
        .on("mouseenter", mouseenter)
        .on("mouseleave", mouseleave)

  }

  svg.call(hover, path);
}

// ===== MEN =====
const svg1 = d3.select("#map13-gents").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"man13.png")
    .attr("x", "-6%")
    .attr("y", "13%")
    .attr("width", 380)
    .attr("height", 380)
    .style("opacity", 1)

// Comment
const annot1 = [
  {
  note: { 
    label: "Remain the most popular libraries out of all, with trend increase from 2018 to present.",
    title: "Matplotlib/ Seaborn/ Plotly",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 300,
  dy: -35,
  dx: +30
}]

svg1.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

d3.csv(baseAssetsUrl + "viz_man.csv").then(function(data){

  const name = "Gentlemen"
  create_graph(data, svg1, name)

});

// ===== WOMEN =====
const svg2 = d3.select("#map13-ladies").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Lady
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"lady13.png")
.attr("x", "-22%")
.attr("y", "9%")
.attr("width", 460)
.attr("height", 460)
.style("opacity", 1)

// Comment
const annot2 = [
  {
  note: { 
    label: "All these libraries remain yet to be discovered (lack of interest or lack of awareness?).",
    title: "Shiny/ D3/ Bokeh/ Geo",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 755,
  y: 130,
  dy: +35,
  dx: +30
}]

svg2.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot2))

d3.csv(baseAssetsUrl + "viz_woman.csv").then(function(data){

  const name = "Ladies"
  create_graph(data, svg2, name)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: parklane">3.2 Machine Learning on the "Blues"</h2>

<h2 style="font-family: times-new-roman">I. The Algorithms</h2>

In [23]:
# ===== Years been doing ML =====
cols = ["Year", "What is your gender? - Selected Choice",
        df.columns[66]]
ml_yrs = df[cols]

ml_yrs.columns = ["Year", "Gender", "MachineLearning"]
ml_yrs = ml_yrs[ml_yrs["Gender"].isin(["Man", "Woman"])]

old_list = ['5-10 years', 'Under 1 year',
       'I do not use machine learning methods', '10-20 years',
       '2-3 years', '1-2 years', '4-5 years', '3-4 years',
       '20 or more years', '< 1 years', '10-15 years', '20+ years',
       'I have never studied machine learning but plan to learn in the future',
       '< 1 year',
       'I have never studied machine learning and I do not plan to']

new_list = ["7. 5-10 years", "2. <1 year",
            "1. Never", "8. 10+ years", 
            "4. 2-3 years", "3. 1-2 years", "6. 4-5 years", "5. 3-4 years",
            "8. 10+ years", "2. <1 year", "8. 10+ years", "8. 10+ years",
            "1. Never",
            "2. <1 year",
            "1. Never"]

ml_yrs["MachineLearning"] = ml_yrs["MachineLearning"].replace(old_list, new_list)
ml_yrs = ml_yrs.value_counts().reset_index()

ml_yrs = pd.merge(ml_yrs, total, on=["Year", "Gender"])
ml_yrs["perc"] = ml_yrs[0] / ml_yrs["Count"]
ml_yrs.drop(columns=[0, "Count"], inplace=True)

ml_yrs = pd.pivot(ml_yrs, index=["Year", "MachineLearning"],
               columns="Gender", values="perc").reset_index()
ml_yrs.columns = ["year", "ml", "male", "female"]

ml_yrs.to_csv("ml_yrs.csv", index=False)

# ===== ML algorithms =====
cols = list(df.columns[84:93])
new_names = ["TreesOrForests", "GradientBoostingMachines", "BayesianApproaches", 
             "EvolutionaryApproaches", "DenseNeuralNets", "CNNs",
             "GANs", "RNNs", "Transformers"]

algorithm = get_data_multiple_answers(cols, new_names)
algorithm = algorithm[(algorithm["Year"]!=2017) & (algorithm["Year"]!=2018)]

algorithm_man = algorithm[algorithm["Gender"] == "Man"].drop(columns=["Gender"])
algorithm_woman = algorithm[algorithm["Gender"] == "Woman"].drop(columns=["Gender"])

algorithm_man.to_csv("algorithm_man.csv", index=False)
algorithm_woman.to_csv("algorithm_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man14.png')
Image.fromarray(im).save("man14.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady14.png')
Image.fromarray(im).save("lady14.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all14 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 100%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all14">
	<center><h1 style="font-family: parklane">Machine Learning: Hystoric</h1></center>
	<label for="filter14" style="font-size: 25px">Select Year Here:</label>
	<select id="filter14" style="font-size: 20px">
		<option value="2021">2021</option>
		<option value="2020">2020</option>
		<option value="2019">2019</option>
		<option value="2018">2018</option>
	  </select>
	<div class="row">
		<div id="map14" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all14 = document.getElementsByClassName('all14')[0];
$all14.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 900 - margin.TOP - margin.BOTTOM
const centreSpacing = 100

const svg = d3.select("#map14").append("svg")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 900")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"man14.png")
    .attr("x", "-12%")
    .attr("y", "-3%")
    .attr("width", 440)
    .attr("height", 440)
    .style("opacity", 1)

// Lady
svg.append("image")
.attr("xlink:href", baseAssetsUrl+"lady14.png")
.attr("x", "74%")
.attr("y", "1%")
.attr("width", 440)
.attr("height", 440)
.style("opacity", 1)

// Gradient
const defs = svg.append('defs');

const bgGradient1 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient14')

bgGradient1
  .append('stop')
  .attr('stop-color', '#927366')
  .attr('offset', '0%');
bgGradient1
  .append('stop')
  .attr('stop-color', '#633725')
  .attr('offset', '100%');

const bgGradient2 = defs
  .append('linearGradient')
  .attr('id', 'bg-gradient15')

bgGradient2
  .append('stop')
  .attr('stop-color', '#728551')
  .attr('offset', '0%');
bgGradient2
  .append('stop')
  .attr('stop-color', '#aab697')
  .attr('offset', '100%');

// ------ DEFAULTS ------
// Initialize scales
const y = d3.scaleBand()
.range([height/2.7, 0])
.padding(0.1)

const x = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxis = svg.append("g")
  .attr('transform', 'translate(165,' + (height-530) + ')')

const xReverse = d3.scaleLinear()
.range([0, (width - centreSpacing) / 2-150])
const xAxisReverse = svg.append("g")
  .attr('transform', 'translate(515,' + (height-530) + ')')


// Male + Female + Labels objects
var gM = svg.append("g")
  .attr("transform", 
  "translate(" + 20 + "," + margin.TOP + ")");

var gF = svg.append("g")
  .attr('transform',
    'translate(' +
      (margin.LEFT + (width - centreSpacing) / 2 + centreSpacing) +
      "," +
      margin.TOP +
      ")");

var gLabels = svg
.append('g')
.attr(
  'transform',
  'translate(' +
    (margin.LEFT + (width - centreSpacing) / 2 + 
    ',' + margin.TOP + ')'));


// === UPDATE ===
function update(data) {

  data.forEach(d => {
    d.female = Number(d.female)
    d.male = Number(d.male)
  })

  // Scales
  y.domain(data.map(d => d.ml))

  const maxVal = d3.max(data, d => d3.max([d.male, d.female]))

  x.domain([0, maxVal])
  xReverse.domain([0, maxVal])

  

  // Male
  gM.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', d => (width - centreSpacing) / 2 - x(d.male))
    .attr('y', d => y(d.ml))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.male))
    .style('fill', 'url(#bg-gradient14)');


  // Female
  gF.selectAll('rect')
    .data(data)
    .join('rect')
    .transition()
    .duration(1000)
    .attr('x', 0)
    .attr('y', d => y(d.ml))
    .attr('rx', 10)
    .attr('ry', 10)
    .attr('height', y.bandwidth())
    .attr('width', d => x(d.female))
    .style('fill', 'url(#bg-gradient15)');
  

  //Labels
  gLabels.selectAll('text')
    .data(data)
    .join('text')
    .attr('x', centreSpacing / 2-45)
    .attr('y', d => y(d.ml) + y.bandwidth() / 2+5)
    .style('font-weight', 600)
    .text((d, i) => d.ml);

  gLabels
    .append('text')
    .text('Years doing Machine Learning')
    .attr('x', centreSpacing / 2-135)
    .attr('y', -1)
    .style('font-size', '20px')
    .style('font-weight', 600);


  // Axis Update
  xAxis
    .transition()
    .duration(1000)
    .call(d3.axisBottom(x).ticks(3).tickFormat(d3.format(".0%")))
    .selectAll("text")
      .attr("font-size", "15px");

  xAxisReverse
    .transition()
    .duration(1000)
    .call(d3.axisBottom(xReverse).ticks(3).tickFormat(d3.format(".0%")))
    .selectAll("text")
      .attr("font-size", "15px");
}


// === Years Programming ===
d3.csv(baseAssetsUrl + "ml_yrs.csv").then(function(dataload){

  // default view
  var data = dataload.filter(function(d) { 
    var sq = d3.select("#filter14").property("value");
    return d.year === sq;
  });

  update(data)


  // on change
  d3.select("#filter14").on("change", function() {

    var data = dataload.filter(function(d) { 
      var sq = d3.select("#filter14").property("value");
      return d.year === sq;
    });

    update(data);
  });

});


function create_stacked_bar(data, subgroups, x_global, y_range, color) {

  const groups = data.map(d => d.Year)

  // Add X axis
  const x = d3.scaleBand()
      .domain(groups)
      .range([0, width/2.5])
      .padding([0.2])
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/1.01})`)
    .call(d3.axisBottom(x).tickSizeOuter(0))
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "15px")
      .attr("font-weight", 300)
      .attr("text-anchor", "middle")

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, y_range])
    .range([ height/2.3, 0]);
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height/1.85})`)
    .call(d3.axisLeft(y).ticks(0))
    .selectAll("text")
      .attr("font-size", "11px")
      .attr("font-weight", 300);

  //s Stack data
  const stackedData = d3.stack()
    .keys(subgroups)
    (data)

  // Show the bars
  const bars = svg.append("g")
    .selectAll("g")
    .data(stackedData)
    .join("g")
    .attr("transform", `translate(${x_global}, ${height/1.85})`)
      .attr("fill", d => color(d.key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("class", d => "myRect " + d.key ) 
      .selectAll("rect")
      .data(d => d)
      .join("rect")
        .attr("x", d => x(d.data.Year))
        .attr("y", d => y(d[1]))
        .attr("height", d => y(d[0]) - y(d[1]))
        .attr("width",x.bandwidth())

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

  bars
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    const subGroupName = d3.select(this.parentNode).datum().key 
          
    d3.selectAll(".myRect").style("opacity", 0.2)  
          
    d3.selectAll("."+subGroupName).style("opacity",1)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);

    const subgroupName = d3.select(this.parentNode).datum().key;
    const subgroupValue = dt.data[subgroupName];

    div.html("Alg: " + subgroupName + "<br>" + "Perc: " + subgroupValue +'%')
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll(".myRect")
          .style("opacity",1) 

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })

}


// === Preferences ===
d3.csv(baseAssetsUrl + "algorithm_man.csv").then(function(data){

  const x_global = 50
  const y_range = 230
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#633725", "#734b3b", "#825f51",
            "#927366", "#a1877c", "#b19b92",
            "#c1afa8", "#d0c3be", "#e0d7d3"]);

  svg.append("text")
  .attr("y", "55%")
  .attr("x", "25%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen: ML Algorithms Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

d3.csv(baseAssetsUrl + "algorithm_woman.csv").then(function(data){

  const x_global = 500
  const y_range = 230
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#728551", "#809162", "#8e9d74",
            "#9caa85", "#aab697", "#b9c2a8",
            "#c7ceb9", "#d5dacb", "#e3e7dc"]);

  svg.append("text")
  .attr("y", "55%")
  .attr("x", "70%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies: ML Algorithms Preferences")

  create_stacked_bar(data, subgroups, x_global, y_range, color)
  
});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: times-new-roman">II. The Frameworks</h2>

<p style="font-family: times-new-roman">None, Other & anything that was less than 3% was excluded</p>

In [24]:
cols = list(df.columns[67:79])
new_names = ["Scikit-learn", "TensorFlow", "Keras", "PyTorch", "Fast.ai",
             "MXNet", "XGBoost", "LightGBM", "CatBoost", "Prophet", "H2O",
             "Caret"]

ml = get_data_multiple_answers(cols, new_names)
ml = ml[ml["Year"]!=2017]

ml_man = ml[ml["Gender"] == "Man"].drop(columns=["Gender"])
ml_woman = ml[ml["Gender"] == "Woman"].drop(columns=["Gender"])

ml_man.to_csv("ml_man.csv", index=False)
ml_woman.to_csv("ml_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man15.png')
Image.fromarray(im).save("man15.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady15.png')
Image.fromarray(im).save("lady15.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all15 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all15 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all15">
	<center><h1 style="font-family: parklane">ML: The Frameworks</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences -</i></h3></center>
	<div class="row">
		<div id="map15-gents" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map15-ladies" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all15 = document.getElementsByClassName('all15')[0];
$all15.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM
const chart_size = 120
const x_global = 150

function create_graph(data, svg, name){

  const XY_axis = height/2-200

  svg.append("text")
  .attr("transform", `translate(${x_global-60}, ${XY_axis-10})`)
  .attr("font-size", "24px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text(name)

  data.forEach(d => {
    d.Year = Number(d.Year)
  })

  // List of groups
  var keys = data.columns.slice(1)

  // Stacking
  var series = d3.stack()
    .keys(keys)
    .offset(d3.stackOffsetSilhouette)
    (data)

  // X Axis
  var x = d3.scaleLinear()
    .domain(d3.extent(data, d => d.Year))
    .range([0, width/1.5])

  var xAxis = g => g
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisBottom(x)
            .tickSize(height/1.2).ticks(4).tickFormat(d3.format("d")))
    .call(g => g.select(".domain").remove())
    .call(g => g.selectAll(".tick line")
                .attr("stroke", "#3A3A3A")
                .attr('stroke-dasharray', '5 5'))
    .call(g => g.selectAll(".tick")
                .select('text')
                .attr('fill', "#3A3A3A")
                .style("font-size", 15))

  // Y Axis
  var y = d3.scaleLinear()
  .domain([-chart_size, chart_size])
  .range([height/1.2, 0])
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${XY_axis})`)
    .call(d3.axisLeft(y).ticks(0))

  // Area Chart
  var area = d3.area()
  .x(d => x(d.data.Year))
  .y0(d => y(d[0]))
  .y1(d => y(d[1]))

  // Color
  const color = d3.scaleOrdinal()
    .domain(keys)
     .range(["#D67A83", "#D05257", "#D87F58","#CE8E27",
     "#8FB79C", "#83AF92", "#588968", "#0B656F", "#74A5D2",
      "#226BBF", "#184E8B", "#BA3B8B"])

  // Show
  const path = svg.append("g")
    .selectAll("path")
    .data(series)
    .join("path")
    .attr("transform", `translate(${x_global}, ${XY_axis})`)
      .attr("data-label", d => d.key)
      .attr("fill", ({key}) => color(key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("d", area)
      .attr("opacity", 0.8)

  svg.append("g")
      .call(xAxis)

  // ==== Interactive ====
  const hover = (svg, path) => {
  
    const line = svg.append("g")
         .attr("display", "none");
  
    line.append("g")
        .selectAll("line")
        .data(series)
        .join("line")
        .attr("class", "cursor-line")
        .attr("fill", "#fff")
        .attr("stroke-width", 0.8)
        .attr("x1", 10)
        .attr("y1", height)
        .attr("x2", 10)
        .attr("y2", 10);

    line.append("text")
      .attr("class", "text-year")
      .attr("font-size", 14)
      .attr("x", 0)
      .attr("y", 26)
      .attr("transform", "rotate(-90 20 20) translate(0, -20)");
    
    line.append("g")
        .selectAll("text")
        .data(series)
        .join("text")
        .attr("class", "text-label")
        .attr("font-weight", 900)
        .attr("font-size", 17)
        .attr("y", height-10)
        .attr("x", -8)
        .attr("transform", 
        "rotate(-90 10 460)");

    line.append("g")
        .selectAll("text")
        .data(data)
        .join("text")
        .attr("class", "text-value")
        .attr("font-weight", 700)
        .attr("font-size", 14)
        .attr("y", height-(height*0.94))
        .attr("x", 14);

    const mousemove = (event) => {
  
      event?.preventDefault();
      const pointer = d3.pointer(event);

      const xm = x.invert(pointer[0]);
      const label = d3.select(event.target).attr("data-label");
  
      line.attr("transform", `translate(${pointer[0]}, 0)`)
          .style("visibility", "inherit");
    
          line.select(".text-year")
          .text(parseInt(xm));
      
      line.selectAll(".text-value")
        .text(d => d.Year === parseInt(xm) ? d[label]+"%" : "");

      line.selectAll(".text-label")
        .style("visibility", "hidden")
        .filter(d => d.key === label)
        .style("visibility", "inherit")
        .attr("fill", d => d.key === label ? color(d.key) : "#000")
        .text(label);
      
      line.selectAll(".cursor-line")
        .attr("stroke", "fff0")
        .filter(d => d.key === label)
        .attr("stroke", d => d.key === label ? color(d.key) : "#fff0")
      
      path.attr("opacity", d => d.key === label ? 0.8 : 0.3);
      
      if (label === null) {
        path.attr("opacity", 0.8);
        line.style("visibility", "hidden");
      }
    }

    const mouseenter = () => {
      line.attr("display", null);
    }
  
    const mouseleave = () => {
      line.attr("display", "none");
    }

    svg
        .on("mousemove", mousemove)
        .on("mouseenter", mouseenter)
        .on("mouseleave", mouseleave)

  }

  svg.call(hover, path);
}

// ===== MEN =====
const svg1 = d3.select("#map15-gents").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

// Gentleman
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"man15.png")
    .attr("x", "-20%")
    .attr("y", "13%")
    .attr("width", 390)
    .attr("height", 390)
    .style("opacity", 1)

// Comment
const annot1 = [
  {
  note: { 
    label: "There is no substantial increase of decrease in any of the frameworks for both men & women.",
    title: "Steady Trend",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  // connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 780,
  y: 350,
  dy: -1,
  dx: +1
}]

svg1.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot1))

d3.csv(baseAssetsUrl + "ml_man.csv").then(function(data){

  const name = "Gentlemen"
  create_graph(data, svg1, name)

});

// ===== WOMEN =====
const svg2 = d3.select("#map15-ladies").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Lady
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"lady15.png")
.attr("x", "-17%")
.attr("y", "12%")
.attr("width", 450)
.attr("height", 450)
.style("opacity", 1)

// Comment
const annot2 = [
  {
  note: { 
    label: "Overall, ladies have smaller interest for frameworks than the gents do.",
    title: "Less Usage",
    wrap: 100, 
    padding: 5, 
  },
  className: "color",
  // connector: { end: "arrow" },
  color: ["#ffffff"],
  x: 780,
  y: 350,
  dy: -1,
  dx: +1
}]

svg2.append("g")
  .style('font-size', 17)
  .call(d3.annotation()
          .annotations(annot2))

d3.csv(baseAssetsUrl + "ml_woman.csv").then(function(data){

  const name = "Ladies"
  create_graph(data, svg2, name)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<h2 style="font-family: parklane">3.3 Deep Learning "Swing"</h2>

<p style="font-family: times-new-roman">Still very small overall percentage + in paranthesis an example</p>

['Year', 'Gender', 'ImageSegmentation (U-Net)',
       'Object detection (RetinaNet)', 'Image classification (ResNet)',
       'Generative Networks (GAN)']
       
['Year', 'Gender', 'EncoderDecorderModels (seq2seq)',
       'Contextualized embeddings (ELMo)',
       'Transformer Language Models (BERT)']

In [25]:
# ===== Computer Vision =====
cols = list(df.columns[96:100])
new_names = ["Image segmentation (U-Net)", "Object detection (RetinaNet)", "Image classification (ResNet)",
             "Generative Networks (GAN)"]

cv = get_data_multiple_answers(cols, new_names)
cv = cv[(cv["Year"]!=2017) & (cv["Year"]!=2018)]
cv.columns = ['Year', 'Gender', 'ImageSegmentation', 'ObjectDetection',
              'ImageClassification', 'GenerativeNetworks']

cv_man = cv[cv["Gender"] == "Man"].drop(columns=["Gender"])
cv_woman = cv[cv["Gender"] == "Woman"].drop(columns=["Gender"])

cv_man.to_csv("cv_man.csv", index=False)
cv_woman.to_csv("cv_woman.csv", index=False)

# ===== NLP =====
cols = list(df.columns[103:106])
new_names = ["Encoder-Decorder models (seq2seq)", "Contextualized embeddings (ELMo)",
             "Transformer Language Models (BERT)"]

nlp = get_data_multiple_answers(cols, new_names)
nlp = nlp[(nlp["Year"]!=2017) & (nlp["Year"]!=2018)]
nlp.columns = ['Year', 'Gender', 'EncoderDecorderModels',
               'ContextualizedEmbeddings', 'TransformerLanguageModels']

nlp_man = nlp[nlp["Gender"] == "Man"].drop(columns=["Gender"])
nlp_woman = nlp[nlp["Gender"] == "Woman"].drop(columns=["Gender"])

nlp_man.to_csv("nlp_man.csv", index=False)
nlp_woman.to_csv("nlp_woman.csv", index=False)

# Save usa.png img as well
im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man16.png')
Image.fromarray(im).save("man16.png")

im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady16.png')
Image.fromarray(im).save("lady16.png")

htmlt = '''
<!-- Bootstrap -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Style -->
<style>
	.all16 {
		font-family: "Times New Roman", Times, serif;
		overflow-x: hidden;
		width: 98%;
		background-color:#e0c9a6;
		/* background-image : url("bkg.jpg"); changed via js */
	}
	.all16 .svg-container {
		display: inline-block;
		position: relative;
		width: 100%;
		padding-bottom: 54%;
		vertical-align: top;
		overflow: hidden;
	}
	.svg-content {
		display: inline-block;
		position: absolute;
		top: 0;
		left: 0;
	}

	.axisHidden path{
		stroke: #e0c9a6;
	}

	.axisHidden line{
		stroke: #e0c9a6;
	}

	.annotation.color text {
		fill: #000000;
	}

	@font-face {
		font-family: parklane;
		src: url(https://h4ks.net/ParkLaneNF.otf);
		}

	div.tooltip-donut {
		position: absolute;
		text-align: center;
		padding: .3rem;
		background: #FFFFFF;
		color: #000000;
		border: 1px solid #000000;
		border-radius: 8px;
		pointer-events: none;
		font-size: 2rem;
		z-index: 99999;
	}

</style>

<img id="baseimg" src="man.png" style="display:none" />
<div class="all16">
	<center><h1 style="font-family: parklane">DL: The Visual & The Talk</h1></center>
	<center><h3><i>- Percentage of respondents and their preferences (Computer Vision & NLP) -</i></h3></center>
	<div class="row">
		<div id="map16-cv" class="svg-container"></div>
	</div>

	<div class="row">
		<div id="map16-nlp" class="svg-container"></div>
	</div>
</div>
'''

js_t = '''
require.config({
  paths: {
    d3src: "https://d3js.org/",
  },
  map: {
    '*': {
      'd3v6': 'd3src/d3.v6.min',
      'd3-selection': 'd3src/d3-selection.v1.min',
      'd3-drag': 'd3src/d3-drag.v1.min',
      'd3-shape': 'd3src/d3-shape.v1.min',
      'd3-path': 'd3src/d3-path.v1.min',
      'd3-dispatch': 'd3src/d3-dispatch.v1.min',
      'd3-annotation': 'https://cdnjs.cloudflare.com/ajax/libs/d3-annotation/2.5.1/d3-annotation.min.js',
    }
  }
});

require(["d3v6", "d3-annotation"], function(d3, d3Annotation) {

// Inject d3Annotation methods to d3
for (var key in d3Annotation) {
  d3[key] = d3Annotation[key];
}

// Get Assets URL set by Kaggle
const baseAssetsUrl = document.getElementById('baseimg').src.replace(/man.png.*$/, '');

const $all16 = document.getElementsByClassName('all16')[0];
$all16.style.backgroundImage = `url("${baseAssetsUrl}bkg.jpg")`;

// Set the SVG area
const margin = { LEFT: 20, RIGHT: 40, TOP: 15, BOTTOM: 0 }
const width = 950 - margin.LEFT - margin.RIGHT
const height = 500 - margin.TOP - margin.BOTTOM


// STACKED CHART
function create_stacked_bar(svg, data, subgroups, x_global,
   y_range, color, flag) {

  const groups = data.map(d => d.Year)

  // Add X axis
  const x = d3.scaleBand()
      .domain(groups)
      .range([0, width/2.3])
      .padding([0.2])
  svg.append("g")
    .attr("transform", `translate(${x_global}, ${height/1.05})`)
    .call(d3.axisBottom(x).tickSizeOuter(0))
    .selectAll("text")
      .attr("y", "10")
      .attr("x", "0")
      .attr("font-size", "15px")
      .attr("font-weight", 300)
      .attr("text-anchor", "middle")

  // Add Y axis
  const y = d3.scaleLinear()
    .domain([0, y_range])
    .range([ height/1.05, 0]);
  svg.append("g")
  .attr("class", "axisHidden")
  .attr("transform", `translate(${x_global}, ${height-500})`)
    .call(d3.axisLeft(y).ticks(0))
    .selectAll("text")
      .attr("font-size", "11px")
      .attr("font-weight", 300);

  //s Stack data
  const stackedData = d3.stack()
    .keys(subgroups)
    (data)

  // Show the bars
  const bars = svg.append("g")
    .selectAll("g")
    .data(stackedData)
    .join("g")
    .attr("transform", `translate(${x_global}, ${height-500})`)
      .attr("fill", d => color(d.key))
      .attr("stroke", "black")
      .attr("stroke-width", 0.2)
      .attr("class", d => "myRect " + d.key ) 
      .selectAll("rect")
      .data(d => d)
      .join("rect")
        .attr("x", d => x(d.data.Year))
        .attr("y", d => y(d[1]))
        .attr("height", d => y(d[0]) - y(d[1]))
        .attr("width",x.bandwidth())

  // Interactive
  var div = d3.select("body").append("div")
  .attr("class", "tooltip-donut")
  .style("opacity", 0);

  bars
    .on('mouseenter', function (event, dt) {
    // MOUSE ON
    const subGroupName = d3.select(this.parentNode).datum().key 
          
    d3.selectAll(".myRect").style("opacity", 0.2)  
          
    d3.selectAll("."+subGroupName).style("opacity",1)


    // Makes the new div appear
    div.transition()
        .duration(50)
        .style("opacity", 1);

    const subgroupName = d3.select(this.parentNode).datum().key;
    const subgroupValue = dt.data[subgroupName];

    div.html(flag + ": " + subgroupName + "<br>" + "Perc: " + subgroupValue +'%')
        .style("left", (event.pageX) + "px")
        .style("top", (event.pageY-30) + "px");


  })
    // MOUSE LEAVE
    .on('mouseleave', function () {
      d3.selectAll(".myRect")
          .style("opacity",1) 

      // New div dissapears
      div.transition()
          .duration('50')
          .style("opacity", 0);
      svg.selectAll('.divergence').remove()
    })

}

// ===== Computer Vision =====
const svg1 = d3.select("#map16-cv").append("svg")
// .attr("style", "outline: thin solid red;")
  .attr("preserveAspectRatio", "xMinYMin meet")
  .attr("viewBox", "0 0 950 500")
  .classed("svg-content", true)
  .attr("transform",
   `translate(${margin.LEFT}, ${margin.TOP})`)

svg1.append("text")
.attr("y", "8%")
.attr("x", "-27%")
.attr("font-size", "30px")
.attr("text-anchor", "middle")
.attr("transform", "rotate(-90)")
.attr("font-family", "parklane")
.text("Computer Vision")

// Ladies
svg1.append("image")
.attr("xlink:href", baseAssetsUrl+"lady16.png")
    .attr("x", "68%")
    .attr("y", "13%")
    .attr("width", 210)
    .attr("height", 210)
    .style("opacity", 1)


d3.csv(baseAssetsUrl + "cv_man.csv").then(function(data){

  const x_global = 80
  const y_range = 80
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#284a5d", "#5f7384", "#90a6b3",
            "#595052"]);
  const flag = "CV"

  svg1.append("text")
  .attr("y", "10%")
  .attr("x", "28%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  create_stacked_bar(svg1, data, subgroups,
     x_global, y_range, color, flag)

});

d3.csv(baseAssetsUrl + "cv_woman.csv").then(function(data){

  const x_global = 500
  const y_range = 80
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#8a3223", "#aa5549", "#bf897a",
            "#c26e4c"]);
  const flag = "CV"

  svg1.append("text")
  .attr("y", "10%")
  .attr("x", "73%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  create_stacked_bar(svg1, data, subgroups,
     x_global, y_range, color, flag)

});



// ===== NLP =====
const svg2 = d3.select("#map16-nlp").append("svg")
// .attr("style", "outline: thin solid red;")
.attr("preserveAspectRatio", "xMinYMin meet")
.attr("viewBox", "0 0 950 500")
.classed("svg-content", true)
.attr("transform",
  `translate(${margin.LEFT}, ${margin.TOP})`)

// Gents
svg2.append("image")
.attr("xlink:href", baseAssetsUrl+"man16.png")
.attr("x", "6%")
.attr("y", "12%")
.attr("width", 220)
.attr("height", 220)
.style("opacity", 1)

svg2.append("text")
.attr("y", "8%")
.attr("x", "-27%")
.attr("font-size", "30px")
.attr("text-anchor", "middle")
.attr("transform", "rotate(-90)")
.attr("font-family", "parklane")
.text("Natural Lang. Processing")

d3.csv(baseAssetsUrl + "nlp_man.csv").then(function(data){

  const x_global = 80
  const y_range = 50
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#284a5d", "#5f7384", "#90a6b3"]);
  const flag = "NLP"

  svg2.append("text")
  .attr("y", "10%")
  .attr("x", "28%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Gentlemen")

  create_stacked_bar(svg2, data, subgroups,
     x_global, y_range, color, flag)

});

d3.csv(baseAssetsUrl + "nlp_woman.csv").then(function(data){

  const x_global = 500
  const y_range = 50
  const subgroups = data.columns.slice(1)
  const color = d3.scaleOrdinal()
    .domain(subgroups)
    .range(["#8a3223", "#aa5549", "#bf897a"]);
  const flag = "NLP"

  svg2.append("text")
  .attr("y", "10%")
  .attr("x", "73%")
  .attr("font-size", "22px")
  .attr("text-anchor", "middle")
  .attr("font-weight", 600)
  .text("Ladies")

  create_stacked_bar(svg2, data, subgroups,
     x_global, y_range, color, flag)

});

});
'''


h = display(HTML(htmlt))
j = py_display.Javascript(js_t)
py_display.display_javascript(j)

<div class="alert simple-alert" style="font-family: times-new-roman">
  <p style="font-family: times-new-roman"><center>👀 ...</center></p>
</div>

<h1 style="font-family: parklane">4. ...</h1>

<p style="font-family: times-new-roman">...</p>

<h2 style="font-family: parklane">4.1 ...</h2>

In [26]:
df.columns[[108, 109, 110, 111]]

Index(['In what industry is your current employer/contract (or your most recent employer if retired)? - Selected Choice',
       'What is the size of the company where you are employed?',
       'Approximately how many individuals are responsible for data science workloads at your place of business?',
       'Does your current employer incorporate machine learning methods into their business?'],
      dtype='object')

In [27]:
df.columns[113:120]

Index(['Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - Build and/or run the data infrastructure that my business uses for storing, analyzing, and operationalizing data',
       'Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - Build prototypes to explore applying machine learning to new areas',
       'Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - Build and/or run a machine learning service that operationally improves my product or workflows',
       'Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - Experimentation and iteration to improve existing ML models',
       'Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - Do research that adva

In [28]:
df.columns[[120, 121]]

Index(['What is your current yearly compensation (approximate $USD)?', 'Approximately how much money have you (or your team) spent on machine learning and/or cloud computing services at home (or at work) in the past 5 years (approximate $USD)?'], dtype='object')

<div class="alert simple-alert" style="font-family: times-new-roman">
  <p style="font-family: times-new-roman"><center>👀 ...</center></p>
</div>

<h1 style="font-family: parklane">5. Spreading the Knowledge</h1>

<p style="font-family: times-new-roman">...</p>

<h2 style="font-family: parklane">5.1 ...</h2>

In [29]:
# # ML Experiments (into ML area!!!)
# df.columns[183:193]

# # Analysis community share
# df.columns[193:203]
# # DS Courses
# df.columns[203:215]
# # Favorite Media Sources
# df.columns[216:228]

<h1 style="font-family: times-new-roman">⌛ Work in Progress</h1>

In [30]:
# cols = list(df.columns[[33, 34, 35, 36, 37, 45]])
# new_names = ["Kaggle", "Colab", "Azure", "Paperspace/Gradient",
#              "Binder/ Jupyter Hub", "None"]

# hosted = get_data_multiple_answers(cols, new_names)
# hosted = hosted[hosted["Year"]!=2017]

# hosted_man = hosted[hosted["Gender"] == "Man"].drop(columns=["Gender"])
# hosted_woman = hosted[hosted["Gender"] == "Woman"].drop(columns=["Gender"])

# hosted_man.to_csv("hosted_man.csv", index=False)
# hosted_woman.to_csv("hosted_woman.csv", index=False)

In [31]:
# # Save usa.png img as well
# im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/man8.png')
# Image.fromarray(im).save("man8.png")

# im = imageio.imread('../input/kaggle-data-science-survey-20172021/images/lady8.png')
# Image.fromarray(im).save("lady8.png")

# htmlt = '''

# '''

# js_t = '''
# '''


# h = display(HTML(htmlt))
# j = py_display.Javascript(js_t)
# py_display.display_javascript(j)

> <p style="font-family: times-new-roman">My <a href="https://wandb.ai/andrada/kaggle-survey-2021?workspace=user-andrada">W&B Dashboard</a> is growing:</p>
<center><img src="https://i.imgur.com/EcQQ3IN.gif" width=800></center>

<center><img src="https://i.imgur.com/cUQXtS7.png"></center>

<h1 style="font-family: parklane">My Specs</h1>

* <p style="font-family: times-new-roman">🖥 Z8 G4: Workstation</p>
* <p style="font-family: times-new-roman">💾 2 CPUs & 96GB Memory</p>
* <p style="font-family: times-new-roman">🎮 NVIDIA Quadro RTX 8000</p>
* <p style="font-family: times-new-roman">💻 Zbook Studio G7 on the go</p>