In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# Load the CSV file with results from the Python survey into a data frame.
py_filename = '../data/2020_sharing_data_outside.csv'

py_df = pd.read_csv(py_filename, low_memory=False)

In [3]:
py_df.head()

Unnamed: 0,is.python.main,other.lang.None,other.lang.Java,other.lang.JavaScript,other.lang.C/C++,other.lang.PHP,other.lang.C#,other.lang.Ruby,other.lang.Bash / Shell,other.lang.Objective-C,...,job.role.Technical support,job.role.Data analyst,job.role.Business analyst,job.role.Team lead,job.role.Product manager,job.role.CIO / CEO / CTO,job.role.Systems analyst,job.role.Other,age,country.live
0,Yes,,,,,,,,Bash / Shell,,...,,,Business analyst,,,,,,30–39,
1,Yes,,Java,JavaScript,,,C#,,,,...,,,,,,,,,21–29,India
2,Yes,,,,C/C++,,,,Bash / Shell,,...,Technical support,Data analyst,,Team lead,,,,,30–39,United States
3,Yes,,,JavaScript,,,,,Bash / Shell,,...,,,,,,,,,,
4,Yes,,Java,JavaScript,C/C++,,,,Bash / Shell,,...,,,,,,,,,21–29,Italy


In [None]:
# Rename some columns

general_columns = ['age',
                   'are.you.datascientist',
                   'company.size',
                   'country.live',
                   'employment.status',
                   'first.learn.about.main.ide',
                   'how.often.use.main.ide',
                   'is.python.main',
                   'main.purposes',
                   'missing.features.main.ide',
                   'nps.main.ide',
                   'python.version.most',
                   'python.years',
                   'python2.version.most',
                   'python3.version.most',
                   'several.projects',
                   'team.size',
                   'use.python.most',
                   'years.of.coding'
                  ]

# Use the function `pd.MultiIndex.from_tuples` to create the multi-index, 
# and then reassign it back to `df.columns`. 

def column_multi_name(column_name):
    if column_name in general_columns:
        return ('general', column_name)
    else:
        first, rest = column_name.rsplit('.', 1)
        return (first, rest)
    
py_df.columns = pd.MultiIndex.from_tuples([column_multi_name(one_column_name)
                  for one_column_name in py_df.columns    ])

In [5]:
py_df.head()

Unnamed: 0_level_0,general,other.lang,other.lang,other.lang,other.lang,other.lang,other.lang,other.lang,other.lang,other.lang,...,job.role,job.role,job.role,job.role,job.role,job.role,job.role,job.role,general,general
Unnamed: 0_level_1,is.python.main,None,Java,JavaScript,C/C++,PHP,C#,Ruby,Bash / Shell,Objective-C,...,Technical support,Data analyst,Business analyst,Team lead,Product manager,CIO / CEO / CTO,Systems analyst,Other,age,country.live
0,Yes,,,,,,,,Bash / Shell,,...,,,Business analyst,,,,,,30–39,
1,Yes,,Java,JavaScript,,,C#,,,,...,,,,,,,,,21–29,India
2,Yes,,,,C/C++,,,,Bash / Shell,,...,Technical support,Data analyst,,Team lead,,,,,30–39,United States
3,Yes,,,JavaScript,,,,,Bash / Shell,,...,,,,,,,,,,
4,Yes,,Java,JavaScript,C/C++,,,,Bash / Shell,,...,,,,,,,,,21–29,Italy


In [6]:
# Sort the columns, such that they're in alphabetical order. 
py_df = py_df[sorted(py_df.columns)]

In [7]:
py_df.head()

Unnamed: 0_level_0,bigdata,bigdata,bigdata,bigdata,bigdata,bigdata,bigdata,bigdata,bigdata,bigdata,...,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks,web.frameworks
Unnamed: 0_level_1,Apache Beam,Apache Flink,Apache Hadoop/MapReduce,Apache Hive,Apache Kafka,Apache Samza,Apache Spark,Apache Tez,ClickHouse,Dask,...,Django,Falcon,FastAPI,Flask,Hug,None,Other,Pyramid,Tornado,web2py
0,Apache Beam,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,Tornado,
2,,,,,,,,,,,...,,,,Flask,,,,,,
3,,,,,,,,,,,...,Django,,,,,,,,,
4,,,,,,,,,,,...,,,,Flask,,,,,,


In [72]:
# What are the 10 most popular IDEs used for editing Python?
# Python을 사용하는 응답자만 필터링
mask = (py_df.loc[:, ('general', 'is.python.main')] == 'Yes') | \
       (py_df.loc[:, ('general', 'is.python.main')] == 'No, I use Python as a secondary language')

py_df.loc[mask, ('ide', 'main')].value_counts().head(10)

(ide, main)
VS Code                         8010
PyCharm Professional Edition    5144
PyCharm Community Edition       3815
Vim                             2176
Sublime Text                    1201
Jupyter Notebook                1167
Atom                             784
Other                            711
Emacs                            635
Spyder                           580
Name: count, dtype: int64

In [71]:
# Python을 주요 언어로 사용하는 응답자만 필터링할 경우
mask = py_df.loc[:, ('general', 'is.python.main')] == 'Yes'

py_df.loc[mask, ('ide', 'main')].value_counts().head(10)

(ide, main)
VS Code                         6770
PyCharm Professional Edition    4633
PyCharm Community Edition       3282
Vim                             1834
Sublime Text                    1065
Jupyter Notebook                1038
Atom                             668
Other                            594
Emacs                            525
Spyder                           501
Name: count, dtype: int64

In [9]:
# Which 10 other programming languages are most commonly used by Python developers?
(
    py_df['other.lang']
    .count()
    .sort_values(ascending=False)
    .head(10)
)

JavaScript      16662
HTML/CSS        15469
Bash / Shell    13793
SQL             13391
C/C++           11623
Java             8109
C#               4460
PHP              4060
TypeScript       3717
Other            3592
dtype: int64

In [10]:
# What were the 10 most common countries from which survey participants came?
(
    py_df[('general', 'country.live')]
    .value_counts()
    .head(10)
)

(general, country.live)
United States         3975
India                 2800
Germany               1807
China                 1155
United Kingdom        1110
France                1078
Russian Federation     935
Other country          880
Brazil                 812
Canada                 644
Name: count, dtype: int64

In [11]:
# According to the Python survey, what proportion of Python developers have each level of experience?

(
    py_df[('general', 'python.years')]
    .value_counts(normalize=True)
)

(general, python.years)
3–5 years           0.284272
Less than 1 year    0.239542
1–2 years           0.224834
6–10 years          0.154939
11+ years           0.096413
Name: proportion, dtype: float64

In [12]:
# Which country has the greatest number of Python developers with 11+ years of experience?
(
    py_df['general']
    [py_df[('general','python.years')] == '11+ years']
    .groupby('country.live')['python.years']
    .count()
    .sort_values(ascending=False)
    .head(1)
)

country.live
United States    691
Name: python.years, dtype: int64

In [13]:
# Proportion of Python devs per country with 11+years experience
country_experience = py_df['general'][['country.live', 'python.years']]
all_per_country = country_experience['country.live'].value_counts() 

In [14]:
all_per_country

country.live
United States         3975
India                 2800
Germany               1807
China                 1155
United Kingdom        1110
                      ... 
Kazakhstan              36
Dominican Republic      34
Uruguay                 33
Armenia                 31
Uzbekistan              31
Name: count, Length: 76, dtype: int64

In [15]:
expert_per_country = (country_experience
                      .loc[
                          country_experience['python.years'] == '11+ years', 
                          'country.live']
                      .value_counts()
                     )

In [16]:
expert_per_country

country.live
United States     691
Germany           308
United Kingdom    207
France            166
Canada             94
                 ... 
Pakistan            1
Tunisia             1
Nigeria             1
Ghana               1
Egypt               1
Name: count, Length: 70, dtype: int64

In [17]:
(expert_per_country / all_per_country).sort_values(ascending=False).dropna().head(10)

country.live
Norway            0.265432
Ireland           0.225490
Australia         0.225420
Belgium           0.225108
Slovenia          0.224490
New Zealand       0.197917
Sweden            0.194030
Finland           0.190141
United Kingdom    0.186486
Austria           0.186170
Name: count, dtype: float64

In [18]:
# Load the CSV file with results from the Stack Overflow survey into a data frame.
so_filename = '../data/so_2021_survey_results.csv'

so_df = pd.read_csv(so_filename, low_memory=False)

In [19]:
so_df.head()

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
3,4,I am a developer by profession,Employed full-time,Austria,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,


한국, 영국, 미국의 이름을 이후에 불러오는 OECD 데이터에서 사용된 국가명과 통일시킨다.

In [20]:
to_replace_dict = {'South Korea' : 'Korea',
                   'Republic of Korea' : 'Korea',
                    'United States of America' : 'United States',
                    'United Kingdom of Great Britain and Northern Ireland' : 'United Kingdom'
                    }
so_df['Country'] = so_df['Country'].replace(to_replace=to_replace_dict)

In [21]:
# Show the average salary for different types of employment. 
# Contractors and freelancers like to say that they earn more than full-time employees. 
# What does the data here show us?

(
    so_df
    .groupby('Employment')['ConvertedCompYearly'].mean()
    .sort_values(ascending=False)
    .dropna()
    .apply(lambda n: f'{n:,.2f}')
)

Employment
I prefer not to say                                     1,455,643.25
Employed full-time                                        121,369.67
Independent contractor, freelancer, or self-employed      107,433.97
Retired                                                    69,533.25
Employed part-time                                         41,136.12
Name: ConvertedCompYearly, dtype: object

In [22]:
pd.options.display.float_format = '{:,.2f}'.format

In [23]:
so_df[['Country', 'EdLevel', 'ConvertedCompYearly']]

Unnamed: 0,Country,EdLevel,ConvertedCompYearly
0,Slovakia,"Secondary school (e.g. American high school, G...",62268.00
1,Netherlands,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",
2,Russian Federation,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",
3,Austria,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",
4,United Kingdom,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",
...,...,...,...
83434,United States,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",160500.00
83435,Benin,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",3960.00
83436,United States,"Secondary school (e.g. American high school, G...",90000.00
83437,Canada,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",816816.00


In [24]:
# Create a pivot table in which the index contains countries, 
# the columns are education levels, 
# and the cells contain the average salary for each education level per country.

(
    so_df
    .pivot_table(index='Country', 
                 columns='EdLevel', 
                 values='ConvertedCompYearly')
)

EdLevel,"Associate degree (A.A., A.S., etc.)","Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Other doctoral degree (Ph.D., Ed.D., etc.)",Primary/elementary school,"Professional degree (JD, MD, etc.)","Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",Some college/university study without earning a degree,Something else
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Afghanistan,,30288.00,10176704.00,,,,100.00,,
Albania,,19152.86,80127.62,,,5298.00,19890.00,22884.00,128522.00
Algeria,,21770.67,15052.57,,,12912.00,,6288.00,
Andorra,,94045.50,22056.00,146981.00,,,,,
Angola,,31500.00,,,,,18678.00,6904.00,
...,...,...,...,...,...,...,...,...,...
"Venezuela, Bolivarian Republic of...",,30108.77,28680.00,,7200.00,14833.29,10200.00,17720.57,
Viet Nam,7827.00,18463.11,50599.80,2592.00,10479.00,30000.00,,18866.19,
Yemen,,5628.67,,,,,,,
Zambia,,40173.00,4908.00,,,,4482.00,12105.33,8184.00


In [25]:
# Load the CSV file with OECD data
oecd_filename = '../data/oecd_locations.csv'

oecd_df = pd.read_csv(oecd_filename, header=None, index_col=1, names=['abbrev', 'Country'])

In [26]:
oecd_df

Unnamed: 0_level_0,abbrev
Country,Unnamed: 1_level_1
Australia,AUS
Austria,AUT
Belgium,BEL
Canada,CAN
Denmark,DNK
Finland,FIN
France,FRA
Germany,DEU
Hungary,HUN
Italy,ITA


In [27]:
so_df.set_index('Country')

Unnamed: 0_level_0,ResponseId,MainBranch,Employment,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,YearsCodePro,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Slovakia,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.00
Netherlands,2,I am a student who is learning to code,"Student, full-time",,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7,,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
Russian Federation,3,"I am not primarily a developer, but I write co...","Student, full-time",,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
Austria,4,I am a developer by profession,Employed full-time,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
United Kingdom,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",,England,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17,10,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United States,83435,I am a developer by profession,Employed full-time,Texas,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",6,5,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,I have a concentration and/or memory disorder ...,Appropriate in length,Easy,160500.00
Benin,83436,I am a developer by profession,"Independent contractor, freelancer, or self-em...",,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",4,2,...,18-24 years old,Man,No,Straight / Heterosexual,Black or of African descent,None of the above,None of the above,Appropriate in length,Easy,3960.00
United States,83437,I am a developer by profession,Employed full-time,New Jersey,,"Secondary school (e.g. American high school, G...",11 - 17 years,School,10,4,...,25-34 years old,Man,No,,White or of European descent,None of the above,None of the above,Appropriate in length,Neither easy nor difficult,90000.00
Canada,83438,I am a developer by profession,Employed full-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,Online Courses or Certification;Books / Physic...,5,3,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,I have a mood or emotional disorder (e.g. depr...,Appropriate in length,Neither easy nor difficult,816816.00


In [28]:
oecd_df.join(so_df.set_index('Country'))

Unnamed: 0_level_0,abbrev,ResponseId,MainBranch,Employment,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Australia,AUS,210,"I am not primarily a developer, but I write co...",Employed full-time,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,School;Friend or family member,4,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,96874.00
Australia,AUS,743,I am a developer by profession,Employed part-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",5 - 10 years,"Other online resources (ex: videos, blogs, etc...",11,...,18-24 years old,Man,No,Bisexual,White or of European descent,None of the above,I have an anxiety disorder,Too short,Easy,16050.00
Australia,AUS,992,None of these,"Student, full-time",,,"Secondary school (e.g. American high school, G...",11 - 17 years,Coding Bootcamp;Other online resources (ex: vi...,,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent;Southeast Asian;B...,None of the above,Prefer not to say,Appropriate in length,Easy,
Australia,AUS,1594,I am a developer by profession,Employed full-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",5 - 10 years,Coding Bootcamp;Other online resources (ex: vi...,24,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,40086.00
Australia,AUS,1779,I am a developer by profession,Employed full-time,,,Some college/university study without earning ...,25 - 34 years,Coding Bootcamp,2,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Too long,Neither easy nor difficult,60132.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Israel,ISR,82948,I am a developer by profession,Employed full-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,Coding Bootcamp;School;Online Courses or Certi...,7,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent;Middle Eastern,None of the above,None of the above,Appropriate in length,Easy,84048.00
Israel,ISR,83072,I am a developer by profession,Employed part-time,,,Some college/university study without earning ...,11 - 17 years,"Other online resources (ex: videos, blogs, etc...",5,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent;Middle Eastern,None of the above,I have a concentration and/or memory disorder ...,Appropriate in length,Easy,84048.00
Israel,ISR,83075,I am a developer by profession,Employed full-time,,,Something else,11 - 17 years,Coding Bootcamp;Other online resources (ex: vi...,5,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,I have a concentration and/or memory disorder ...,Appropriate in length,Easy,
Israel,ISR,83244,I am a developer by profession,Employed full-time,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Other (please specify):,36,...,45-54 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,119076.00


In [29]:
# Create this pivot table again, only including countries in our OECD subset.
(
    oecd_df
    .join(so_df.set_index('Country'))
    .pivot_table(index='Country',
                 columns='EdLevel', 
                 values='ConvertedCompYearly')
)

EdLevel,"Associate degree (A.A., A.S., etc.)","Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Other doctoral degree (Ph.D., Ed.D., etc.)",Primary/elementary school,"Professional degree (JD, MD, etc.)","Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",Some college/university study without earning a degree,Something else
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Australia,117049.64,180794.16,106794.32,150234.96,153327.5,108725.0,158931.28,127063.53,231987.17
Austria,43623.38,66096.13,77645.66,74783.17,86877.0,38915.33,47438.83,53906.05,45772.53
Belgium,35664.0,68474.7,88580.41,80832.44,11342349.25,71000.2,43224.13,88006.29,27035.5
Brazil,25347.42,47681.23,42056.01,43123.21,7880.0,25449.67,15072.89,39978.02,20288.71
Canada,87930.35,140668.24,144733.35,102989.35,73787.83,82953.4,180584.81,155090.08,60795.17
Denmark,80217.33,80430.94,115751.85,102785.19,91056.57,114144.89,90411.74,85131.35,95479.86
Finland,282353.67,69381.34,74360.98,61508.25,77832.0,83016.0,59424.32,89255.56,35024.0
France,54394.89,65062.38,94585.73,140402.86,34181.0,78342.27,53981.81,55119.45,42884.04
Germany,98530.52,102751.23,110611.98,108718.46,74943.67,97330.14,82865.84,95737.77,109333.99
Hungary,51041.0,48756.45,46771.71,52833.6,48100.0,9852.0,33440.8,37102.89,28911.0


In [30]:
oecd_df.join(so_df.set_index('Country'))

Unnamed: 0_level_0,abbrev,ResponseId,MainBranch,Employment,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Australia,AUS,210,"I am not primarily a developer, but I write co...",Employed full-time,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,School;Friend or family member,4,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,96874.00
Australia,AUS,743,I am a developer by profession,Employed part-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",5 - 10 years,"Other online resources (ex: videos, blogs, etc...",11,...,18-24 years old,Man,No,Bisexual,White or of European descent,None of the above,I have an anxiety disorder,Too short,Easy,16050.00
Australia,AUS,992,None of these,"Student, full-time",,,"Secondary school (e.g. American high school, G...",11 - 17 years,Coding Bootcamp;Other online resources (ex: vi...,,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent;Southeast Asian;B...,None of the above,Prefer not to say,Appropriate in length,Easy,
Australia,AUS,1594,I am a developer by profession,Employed full-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",5 - 10 years,Coding Bootcamp;Other online resources (ex: vi...,24,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,40086.00
Australia,AUS,1779,I am a developer by profession,Employed full-time,,,Some college/university study without earning ...,25 - 34 years,Coding Bootcamp,2,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Too long,Neither easy nor difficult,60132.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Israel,ISR,82948,I am a developer by profession,Employed full-time,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,Coding Bootcamp;School;Online Courses or Certi...,7,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent;Middle Eastern,None of the above,None of the above,Appropriate in length,Easy,84048.00
Israel,ISR,83072,I am a developer by profession,Employed part-time,,,Some college/university study without earning ...,11 - 17 years,"Other online resources (ex: videos, blogs, etc...",5,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent;Middle Eastern,None of the above,I have a concentration and/or memory disorder ...,Appropriate in length,Easy,84048.00
Israel,ISR,83075,I am a developer by profession,Employed full-time,,,Something else,11 - 17 years,Coding Bootcamp;Other online resources (ex: vi...,5,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,I have a concentration and/or memory disorder ...,Appropriate in length,Easy,
Israel,ISR,83244,I am a developer by profession,Employed full-time,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Other (please specify):,36,...,45-54 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,119076.00


In [31]:
# In which of these countries does someone with an associate degree earn the most? 
(
    oecd_df.join(so_df.set_index('Country'))
    .pivot_table(index='Country',
                 columns='EdLevel', 
                 values='ConvertedCompYearly')['Associate degree (A.A., A.S., etc.)']
    .sort_values(ascending=False)
)

Country
Finland          282,353.67
United States    206,568.32
Israel           146,420.90
Japan            143,196.83
United Kingdom   139,154.85
Australia        117,049.64
Germany           98,530.52
Canada            87,930.35
Denmark           80,217.33
France            54,394.89
Hungary           51,041.00
Austria           43,623.38
Italy             36,427.94
Belgium           35,664.00
Korea             26,408.67
Brazil            25,347.42
Name: Associate degree (A.A., A.S., etc.), dtype: float64

In [32]:
# In which of them does someone with a doctoral degree earn the most?
# In which of these countries does someone with an associate degree earn the most? 
(
    oecd_df
    .join(so_df.set_index('Country'))
    .pivot_table(index='Country',
                 columns='EdLevel', 
                 values='ConvertedCompYearly')['Other doctoral degree (Ph.D., Ed.D., etc.)']
    .sort_values(ascending=False)
)

Country
United States    208,656.94
Japan            157,239.40
Australia        150,234.96
France           140,402.86
Israel           131,812.62
United Kingdom   123,226.40
Germany          108,718.46
Canada           102,989.35
Denmark          102,785.19
Italy             93,490.78
Belgium           80,832.44
Austria           74,783.17
Finland           61,508.25
Hungary           52,833.60
Brazil            43,123.21
Korea             41,919.00
Name: Other doctoral degree (Ph.D., Ed.D., etc.), dtype: float64

In [33]:
# Remove rows from `so_df` in which `LanguageHaveWorkedWith` is `NaN`.
so_df = so_df.dropna(subset=['LanguageHaveWorkedWith'])

In [35]:
# Remove rows from `so_df` in which Python isn't included 
# as a commonly used language (`LanguageHaveWorkedWith`).
so_df = (
    so_df.loc[so_df['LanguageHaveWorkedWith'].str.contains('Python')]
)

In [36]:
# Remove rows from `so_df` in which `YearsCode` is `NaN`.
so_df = (so_df
         .dropna(subset=['YearsCode'])
        )

In [41]:
so_df.loc[so_df['YearsCode'] == 'Less than 1 year', 'YearsCode'] = 0
so_df.loc[so_df['YearsCode'] == 'More than 50 years', 'YearsCode'] = 51

In [42]:
so_df['YearsCode'] = so_df['YearsCode'].astype(int)

In [51]:
# Create a new column in `so_df`, called `experience`, which will categorize the values in `YearsCode`

so_df['experience'] = pd.cut(so_df['YearsCode'], 
                             bins=[-1, 0, 2, 5, 10, 100], 
                             labels=['Less than 1 year', '1-2 years', '3-5 years', '6-10 years', '11+ years'])

In [52]:
# According to the Python survey, what proportion of Python developers have each level of experience?
so_df['experience'].value_counts(normalize=True)

experience
11+ years          0.37
6-10 years         0.32
3-5 years          0.22
1-2 years          0.07
Less than 1 year   0.02
Name: proportion, dtype: float64

In [50]:
py_df['other.lang']

Unnamed: 0,Bash / Shell,C#,C/C++,Clojure,CoffeeScript,Go,Groovy,HTML/CSS,Java,JavaScript,...,PHP,Perl,R,Ruby,Rust,SQL,Scala,Swift,TypeScript,Visual Basic
0,Bash / Shell,,,,,,,,,,...,,,,,,,,,,
1,,C#,,,,,,,Java,JavaScript,...,,,R,,,SQL,,,TypeScript,
2,Bash / Shell,,C/C++,,,,,,,,...,,,,,,,,,,
3,Bash / Shell,,,,,,,HTML/CSS,,JavaScript,...,,,,,,SQL,,,,
4,Bash / Shell,,C/C++,,,,,HTML/CSS,Java,JavaScript,...,,,,,,SQL,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54457,Bash / Shell,,C/C++,,,,,,,,...,,,R,,,,,,,
54458,Bash / Shell,,,,,,,HTML/CSS,,JavaScript,...,,,,,,,,,,
54459,Bash / Shell,,,,,Go,,HTML/CSS,,JavaScript,...,PHP,,,,,SQL,,,TypeScript,
54460,Bash / Shell,,C/C++,,,,,HTML/CSS,,JavaScript,...,PHP,,,,,SQL,,,,
