In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load the data and filter needed columns

In [3]:
df = pd.read_csv('/content/drive/MyDrive/datasets/stackoverflow_survey_2020/survey_results_public.csv')
desiredColumn = ['MainBranch', 'ConvertedComp', 'Country', 'DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType', 'EdLevel', 'Employment', 'LanguageDesireNextYear', 'LanguageWorkedWith', 'MiscTechDesireNextYear', 'MiscTechWorkedWith', 'NEWLearn', 'PlatformDesireNextYear', 'PlatformWorkedWith', 'UndergradMajor', 'WebframeDesireNextYear', 'WebframeWorkedWith', 'WorkWeekHrs', 'YearsCode', 'YearsCodePro']
df = df[desiredColumn]
df.head(20)

Unnamed: 0,MainBranch,ConvertedComp,Country,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWLearn,PlatformDesireNextYear,PlatformWorkedWith,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WorkWeekHrs,YearsCode,YearsCodePro
0,I am a developer by profession,,Germany,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Once a year,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,50.0,36,27.0
1,I am a developer by profession,,United Kingdom,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Once a year,iOS;Kubernetes;Linux;MacOS,iOS,"Computer science, computer engineering, or sof...",,,,7,4.0
2,I code primarily as a hobby,,Russian Federation,,,,,,Objective-C;Python;Swift,Objective-C;Python;Swift,,,Once a decade,,,,,,,4,
3,I am a developer by profession,,Albania,,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,,,,,Once a year,,,"Computer science, computer engineering, or sof...",,,40.0,7,4.0
4,"I used to be a developer by profession, but no...",,United States,MySQL;PostgreSQL,MySQL;PostgreSQL;Redis;SQLite,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Java;Ruby;Scala,HTML/CSS;Ruby;SQL,Ansible;Chef,Ansible,Once a year,Docker;Google Cloud Platform;Heroku;Linux;Windows,AWS;Docker;Linux;MacOS;Windows,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,,15,8.0
5,I am a developer by profession,,Germany,,,"Designer;Developer, front-end;Developer, mobile","Secondary school (e.g. American high school, G...",Employed full-time,HTML/CSS;Java;JavaScript,HTML/CSS;Java;JavaScript,,,Once a year,Android,Android;Docker;WordPress,,React.js,,,6,4.0
6,I am a developer by profession,,India,,,"Developer, back-end;Developer, front-end;Devel...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,C#;HTML/CSS;PHP,C#;HTML/CSS;PHP,,,Every few months,,,"Computer science, computer engineering, or sof...",,,,6,4.0
7,I am a developer by profession,116000.0,United States,MongoDB,MariaDB;MySQL;Redis,"Developer, back-end;Developer, desktop or ente...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,JavaScript,Python;SQL,Unity 3D,Ansible,Once a year,iOS;Slack Apps and Integrations,Docker,"Computer science, computer engineering, or sof...",Django;React.js;Vue.js,Flask,39.0,17,13.0
8,I am a developer by profession,,Tunisia,,,"Developer, full-stack","Professional degree (JD, MD, etc.)","Independent contractor, freelancer, or self-em...",Python;Rust,HTML/CSS;JavaScript;PHP,,,Once a year,,WordPress,"Computer science, computer engineering, or sof...",Angular.js,jQuery,50.0,6,4.0
9,I am a developer by profession,32315.0,United Kingdom,Microsoft SQL Server,Microsoft SQL Server,"Database administrator;Developer, full-stack;D...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,HTML/CSS;Java;JavaScript;Python;R;SQL,HTML/CSS;Java;JavaScript;Python;SQL,Pandas;TensorFlow,Pandas,Every few months,Android;Linux;Raspberry Pi;Windows,Android;Linux;Raspberry Pi;Windows,Mathematics or statistics,Flask;jQuery,Flask;jQuery,36.0,8,4.0


### Filter only developer profession

In [4]:
df = df[df['MainBranch'] == 'I am a developer by profession']
df['MainBranch'].unique()

array(['I am a developer by profession'], dtype=object)

### Create functions to convert list to dummies



In [5]:
def add_list_to_df(df, arr, prefix):
  '''
  INPUT:
  df - target df to be added
  arr - list of string to be added to dataframe
  prefix - column prefix

  OUTPUT:
  new_df - dataframe with added dummies column
  '''

  append_dict = {}
  for col_name in arr:
    col_name = prefix + '_' + col_name
    
    #add new column in dataframe
    if col_name not in list(df.columns):
      df[col_name] = np.zeros(df.shape[0])
    
    append_dict[col_name] = 1
  
  #add new row
  df = df.append(append_dict, ignore_index=True, verify_integrity=True)

  return df

In [6]:
def separate_column_list(df, col_name, list_sep, prefix=None):
  '''
  INPUT:
  df - pandas dataframe
  col_name - name of column which contain list that want to be separated
  list_sep - character used to separate object in the list
  prefix - prefix for the new column

  OUTPUT:
  new_df - new dataframe with separated column only
  '''
  if prefix == None:
    prefix = col_name
  
  target_df = df[col_name]
  new_df = pd.DataFrame()

  for i in range(df.shape[0]):
    raw_str = target_df.iat[i]
    if type(raw_str) != str:
      continue
    str_list = raw_str.split(list_sep)
    new_df = add_list_to_df(new_df, str_list, prefix)
  
  return new_df

### List of columns that will be splitted into dummy

In [7]:
dummified_columns = ['DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType',
  'LanguageDesireNextYear', 'LanguageWorkedWith', 'MiscTechDesireNextYear',
  'MiscTechWorkedWith', 'PlatformDesireNextYear', 'PlatformWorkedWith', 'WebframeDesireNextYear',
  'WebframeWorkedWith']

### slice database for trial (comment this line when you are ready)

In [8]:
df = df.iloc[:100,:]

In [9]:
for column in dummified_columns:
  df = pd.concat([df.drop(axis=1, columns=column), separate_column_list(df, column, ';')],
                  axis=1)

In [10]:
df.head(20)

Unnamed: 0,MainBranch,ConvertedComp,Country,EdLevel,Employment,NEWLearn,UndergradMajor,WorkWeekHrs,YearsCode,YearsCodePro,DatabaseDesireNextYear_Microsoft SQL Server,DatabaseDesireNextYear_MongoDB,DatabaseDesireNextYear_Firebase,DatabaseDesireNextYear_PostgreSQL,DatabaseDesireNextYear_Cassandra,DatabaseDesireNextYear_Elasticsearch,DatabaseDesireNextYear_MariaDB,DatabaseDesireNextYear_Redis,DatabaseDesireNextYear_SQLite,DatabaseDesireNextYear_Oracle,DatabaseDesireNextYear_MySQL,DatabaseDesireNextYear_DynamoDB,DatabaseDesireNextYear_Couchbase,DatabaseWorkedWith_Elasticsearch,DatabaseWorkedWith_Microsoft SQL Server,DatabaseWorkedWith_Oracle,DatabaseWorkedWith_MariaDB,DatabaseWorkedWith_MySQL,DatabaseWorkedWith_Redis,DatabaseWorkedWith_Firebase,DatabaseWorkedWith_MongoDB,DatabaseWorkedWith_PostgreSQL,DatabaseWorkedWith_SQLite,DatabaseWorkedWith_IBM DB2,DatabaseWorkedWith_DynamoDB,DatabaseWorkedWith_Cassandra,"DevType_Developer, desktop or enterprise applications","DevType_Developer, full-stack","DevType_Developer, mobile",DevType_Designer,...,PlatformWorkedWith_Raspberry Pi,PlatformWorkedWith_AWS,PlatformWorkedWith_Heroku,PlatformWorkedWith_Google Cloud Platform,PlatformWorkedWith_Kubernetes,PlatformWorkedWith_MacOS,PlatformWorkedWith_Arduino,PlatformWorkedWith_Slack Apps and Integrations,PlatformWorkedWith_Microsoft Azure,PlatformWorkedWith_IBM Cloud or Watson,WebframeDesireNextYear_ASP.NET Core,WebframeDesireNextYear_React.js,WebframeDesireNextYear_Django,WebframeDesireNextYear_Vue.js,WebframeDesireNextYear_Angular.js,WebframeDesireNextYear_Flask,WebframeDesireNextYear_jQuery,WebframeDesireNextYear_Angular,WebframeDesireNextYear_ASP.NET,WebframeDesireNextYear_Gatsby,WebframeDesireNextYear_Express,WebframeDesireNextYear_Spring,WebframeDesireNextYear_Ruby on Rails,WebframeDesireNextYear_Laravel,WebframeDesireNextYear_Symfony,WebframeWorkedWith_ASP.NET,WebframeWorkedWith_ASP.NET Core,WebframeWorkedWith_Flask,WebframeWorkedWith_jQuery,WebframeWorkedWith_Angular,WebframeWorkedWith_Angular.js,WebframeWorkedWith_Django,WebframeWorkedWith_React.js,WebframeWorkedWith_Vue.js,WebframeWorkedWith_Gatsby,WebframeWorkedWith_Spring,WebframeWorkedWith_Express,WebframeWorkedWith_Ruby on Rails,WebframeWorkedWith_Symfony,WebframeWorkedWith_Laravel
0,I am a developer by profession,,Germany,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",Once a year,"Computer science, computer engineering, or sof...",50.0,36.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,I am a developer by profession,,United Kingdom,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Once a year,"Computer science, computer engineering, or sof...",,7.0,4.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,,,,,,,,,,,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,I am a developer by profession,,Albania,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,Once a year,"Computer science, computer engineering, or sof...",40.0,7.0,4.0,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,,,,,,,,,,,1.0,,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,,,,,1.0,,,,0.0,0.0,0.0,1.0,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,I am a developer by profession,,Germany,"Secondary school (e.g. American high school, G...",Employed full-time,Once a year,,,6.0,4.0,,,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,1.0,,0.0,0.0,0.0,,1.0,,,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,1.0,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,1.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,I am a developer by profession,,India,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Every few months,"Computer science, computer engineering, or sof...",,6.0,4.0,,,,1.0,,1.0,,1.0,1.0,0.0,0.0,0.0,0.0,,1.0,,,,,,,1.0,,0.0,0.0,0.0,,1.0,1.0,,...,,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,1.0,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,I am a developer by profession,116000.0,United States,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Once a year,"Computer science, computer engineering, or sof...",39.0,17.0,13.0,,1.0,,,,,1.0,,,0.0,0.0,0.0,0.0,1.0,,,1.0,1.0,1.0,,,,1.0,0.0,0.0,0.0,1.0,,,,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,1.0,,,,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,,,,1.0,,,,1.0,,0.0,0.0,0.0,0.0,0.0,0.0
8,I am a developer by profession,,Tunisia,"Professional degree (JD, MD, etc.)","Independent contractor, freelancer, or self-em...",Once a year,"Computer science, computer engineering, or sof...",50.0,6.0,4.0,,,,1.0,1.0,,,,,0.0,0.0,0.0,0.0,,1.0,,1.0,,,,,,,0.0,0.0,0.0,,,,1.0,...,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,,1.0,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,1.0,,,1.0,,,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
9,I am a developer by profession,32315.0,United Kingdom,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,Every few months,Mathematics or statistics,36.0,8.0,4.0,,,,,,,1.0,,,1.0,0.0,0.0,0.0,,1.0,,1.0,1.0,,,,,,1.0,0.0,0.0,1.0,,,,...,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,,1.0,1.0,,,,,,,,1.0,0.0,0.0,0.0,0.0,,1.0,,,1.0,1.0,,,,,0.0,0.0,0.0,0.0,0.0
