In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load the data and filter needed columns

In [3]:
df = pd.read_csv('/content/drive/MyDrive/datasets/stackoverflow_survey_2020/survey_results_public.csv')
desiredColumn = ['MainBranch', 'ConvertedComp', 'Country', 'DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType', 'EdLevel', 'Employment', 'LanguageDesireNextYear', 'LanguageWorkedWith', 'MiscTechDesireNextYear', 'MiscTechWorkedWith', 'NEWLearn', 'PlatformDesireNextYear', 'PlatformWorkedWith', 'UndergradMajor', 'WebframeDesireNextYear', 'WebframeWorkedWith', 'WorkWeekHrs', 'YearsCode', 'YearsCodePro']
df = df[desiredColumn]
df.head()

Unnamed: 0,MainBranch,ConvertedComp,Country,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWLearn,PlatformDesireNextYear,PlatformWorkedWith,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WorkWeekHrs,YearsCode,YearsCodePro
0,I am a developer by profession,,Germany,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Once a year,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,50.0,36,27.0
1,I am a developer by profession,,United Kingdom,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Once a year,iOS;Kubernetes;Linux;MacOS,iOS,"Computer science, computer engineering, or sof...",,,,7,4.0
2,I code primarily as a hobby,,Russian Federation,,,,,,Objective-C;Python;Swift,Objective-C;Python;Swift,,,Once a decade,,,,,,,4,
3,I am a developer by profession,,Albania,,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,,,,,Once a year,,,"Computer science, computer engineering, or sof...",,,40.0,7,4.0
4,"I used to be a developer by profession, but no...",,United States,MySQL;PostgreSQL,MySQL;PostgreSQL;Redis;SQLite,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Java;Ruby;Scala,HTML/CSS;Ruby;SQL,Ansible;Chef,Ansible,Once a year,Docker;Google Cloud Platform;Heroku;Linux;Windows,AWS;Docker;Linux;MacOS;Windows,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,,15,8.0


### Filter only developer profession

In [4]:
df = df[df['MainBranch'] == 'I am a developer by profession']
df['MainBranch'].unique()

array(['I am a developer by profession'], dtype=object)

### Create functions to convert list to dummies



In [13]:
def add_list_to_df(df, arr, prefix):
  '''
  INPUT:
  df - target df to be added
  arr - list of string to be added to dataframe
  prefix - column prefix

  OUTPUT:
  new_df - dataframe with added dummies column
  '''

  append_dict = {}
  for col_name in arr:
    col_name = prefix + '_' + col_name
    
    #add new column in dataframe
    if col_name not in list(df.columns):
      df[col_name] = np.zeros(df.shape[0])
    
    append_dict[col_name] = 1
  
  #add new row
  df = df.append(append_dict, ignore_index=True)

  return df
    
    
    



In [7]:
def separate_column_list(df, col_name, list_sep, prefix=None):
  '''
  INPUT:
  df - pandas dataframe
  col_name - name of column which contain list that want to be separated
  list_sep - character used to separate object in the list
  prefix - prefix for the new column

  OUTPUT:
  new_df - new dataframe with separated column only
  '''
  if prefix == None:
    prefix = col_name
  
  target_df = df[col_name]
  new_df = pd.DataFrame()

  for i in range(df.shape[0]):
    raw_str = target_df.iat[i]
    if type(raw_str) != str:
      continue
    str_list = raw_str.split(list_sep)
    new_df = add_list_to_df(new_df, str_list, prefix)
  
  return new_df
    
    
      


In [14]:
df = pd.concat([df.drop(axis=1, columns='DatabaseWorkedWith'), separate_column_list(df, 'DatabaseWorkedWith', ';')],
               axis=1)
df

Unnamed: 0,MainBranch,ConvertedComp,Country,DatabaseDesireNextYear,DevType,EdLevel,Employment,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWLearn,PlatformDesireNextYear,PlatformWorkedWith,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WorkWeekHrs,YearsCode,YearsCodePro,DatabaseWorkedWith_Elasticsearch,DatabaseWorkedWith_Microsoft SQL Server,DatabaseWorkedWith_Oracle,DatabaseWorkedWith_MariaDB,DatabaseWorkedWith_MySQL,DatabaseWorkedWith_Redis,DatabaseWorkedWith_Firebase,DatabaseWorkedWith_MongoDB,DatabaseWorkedWith_PostgreSQL,DatabaseWorkedWith_SQLite,DatabaseWorkedWith_IBM DB2,DatabaseWorkedWith_DynamoDB,DatabaseWorkedWith_Cassandra,DatabaseWorkedWith_Couchbase
0,I am a developer by profession,,Germany,Microsoft SQL Server,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Once a year,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,50.0,36,27,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,I am a developer by profession,,United Kingdom,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Once a year,iOS;Kubernetes;Linux;MacOS,iOS,"Computer science, computer engineering, or sof...",,,,7,4,,,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,I am a developer by profession,,Albania,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,,,,,Once a year,,,"Computer science, computer engineering, or sof...",,,40.0,7,4,,,,,,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
4,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,1.0,,,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64154,I am a developer by profession,38759.0,Japan,,"Developer, front-end","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,,,,,Web development or web design,,,40.0,5,Less than 1 year,,,,,,,,,,,,,,
64155,I am a developer by profession,,Australia,,,,Employed full-time,,,,,,,,,,,,,,,,,,,,,,,,,,,
64157,I am a developer by profession,,Philippines,Oracle;SQLite,,,Employed full-time,HTML/CSS;Java;JavaScript,HTML/CSS;Java;JavaScript,,,,Android;Arduino;Linux;Windows;WordPress,Android;Arduino;Linux;Windows;WordPress,,React.js,React.js,,,,,,,,,,,,,,,,,
64158,I am a developer by profession,,Philippines,,"Developer, back-end;Developer, desktop or ente...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,,,,,"Information systems, information technology, o...",,,,6,Less than 1 year,,,,,,,,,,,,,,
