In [8]:
import pandas as pd
import numpy as np

In [9]:
na_vals = ['NA', 'Missing']
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent', na_values = na_vals)
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')

In [3]:
pd.set_option('display.max_columns',85)
pd.set_option('display.max_rows',85)

In [4]:
df.shape

(88883, 84)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 88883 entries, 1 to 88863
Data columns (total 84 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   MainBranch              88331 non-null  object 
 1   Hobbyist                88883 non-null  object 
 2   OpenSourcer             88883 non-null  object 
 3   OpenSource              86842 non-null  object 
 4   Employment              87181 non-null  object 
 5   Country                 88751 non-null  object 
 6   Student                 87014 non-null  object 
 7   EdLevel                 86390 non-null  object 
 8   UndergradMajor          75614 non-null  object 
 9   EduOther                84260 non-null  object 
 10  OrgSize                 71791 non-null  object 
 11  DevType                 81335 non-null  object 
 12  YearsCode               87938 non-null  object 
 13  Age1stCode              87634 non-null  object 
 14  YearsCodePro            74331 non-null

In [6]:
df.columns

Index(['MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource', 'Employment',
       'Country', 'Student', 'EdLevel', 'UndergradMajor', 'EduOther',
       'OrgSize', 'DevType', 'YearsCode', 'Age1stCode', 'YearsCodePro',
       'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney', 'MgrWant', 'JobSeek',
       'LastHireDate', 'LastInt', 'FizzBuzz', 'JobFactors', 'ResumeUpdate',
       'CurrencySymbol', 'CurrencyDesc', 'CompTotal', 'CompFreq',
       'ConvertedComp', 'WorkWeekHrs', 'WorkPlan', 'WorkChallenge',
       'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev', 'CodeRevHrs', 'UnitTests',
       'PurchaseHow', 'PurchaseWhat', 'LanguageWorkedWith',
       'LanguageDesireNextYear', 'DatabaseWorkedWith',
       'DatabaseDesireNextYear', 'PlatformWorkedWith',
       'PlatformDesireNextYear', 'WebFrameWorkedWith',
       'WebFrameDesireNextYear', 'MiscTechWorkedWith',
       'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',
       'BlockchainOrg', 'BlockchainIs', 'BetterLife', 'ITperson', 

In [7]:
df['Hobbyist'].value_counts()

Yes    71257
No     17626
Name: Hobbyist, dtype: int64

In [8]:
schema_df.loc['MgrIdiot', 'QuestionText']

'How confident are you that your manager knows what they’re doing?'

In [9]:
countries = ['United States', 'India']
high_salary = df['Country'].isin(countries)

In [10]:
df.loc[high_salary, 'Country']

Respondent
4        United States
8                India
10               India
13       United States
15               India
             ...      
83862            India
84299            India
85642    United States
86012            India
88282    United States
Name: Country, Length: 30010, dtype: object

In [11]:
filt = df['LanguageWorkedWith'].str.contains('Python', na=False)
df.loc[filt, 'LanguageWorkedWith']

Respondent
1                          HTML/CSS;Java;JavaScript;Python
2                                      C++;HTML/CSS;Python
4                                      C;C++;C#;Python;SQL
5              C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA
8        Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
                               ...                        
84539    Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
85738      Bash/Shell/PowerShell;C++;Python;Ruby;Other(s):
86566      Bash/Shell/PowerShell;HTML/CSS;Python;Other(s):
87739             C;C++;HTML/CSS;JavaScript;PHP;Python;SQL
88212                           HTML/CSS;JavaScript;Python
Name: LanguageWorkedWith, Length: 36443, dtype: object

In [12]:
# Updating values
df['Hobbyist'].map({'Yes':True, 'No':False})

Respondent
1         True
2        False
3         True
4        False
5         True
         ...  
88377     True
88601    False
88802    False
88816    False
88863     True
Name: Hobbyist, Length: 88883, dtype: bool

In [4]:
df.sort_values(by=['Country', 'ConvertedComp'], ascending=[True, False], inplace=True)

In [5]:
df['ConvertedComp'].nlargest(10)

Respondent
25983    2000000.0
87896    2000000.0
22013    2000000.0
28243    2000000.0
72732    2000000.0
78151    2000000.0
80200    2000000.0
52132    2000000.0
75561    2000000.0
32250    2000000.0
Name: ConvertedComp, dtype: float64

In [6]:
schema_df.loc['FizzBuzz', 'QuestionText']

'Have you ever been asked to solve FizzBuzz in an interview?'

In [16]:
df['ConvertedComp'].median()

57287.0

In [17]:
df.median()

CompTotal        62000.0
ConvertedComp    57287.0
WorkWeekHrs         40.0
CodeRevHrs           4.0
Age                 29.0
dtype: float64

In [18]:
df.describe()

Unnamed: 0,CompTotal,ConvertedComp,WorkWeekHrs,CodeRevHrs,Age
count,55945.0,55823.0,64503.0,49790.0,79210.0
mean,551901400000.0,127110.7,42.127197,5.084308,30.336699
std,73319260000000.0,284152.3,37.28761,5.513931,9.17839
min,0.0,0.0,1.0,0.0,1.0
25%,20000.0,25777.5,40.0,2.0,24.0
50%,62000.0,57287.0,40.0,4.0,29.0
75%,120000.0,100000.0,44.75,6.0,35.0
max,1e+16,2000000.0,4850.0,99.0,99.0


In [19]:
df['ConvertedComp'].count()

55823

In [20]:
df['Hobbyist'].value_counts()

Yes    71257
No     17626
Name: Hobbyist, dtype: int64

In [21]:
df['SocialMedia'].value_counts()

Reddit                      14374
YouTube                     13830
WhatsApp                    13347
Facebook                    13178
Twitter                     11398
Instagram                    6261
I don't use social media     5554
LinkedIn                     4501
WeChat 微信                     667
Snapchat                      628
VK ВКонта́кте                 603
Weibo 新浪微博                     56
Youku Tudou 优酷                 21
Hello                          19
Name: SocialMedia, dtype: int64

In [22]:
df['SocialMedia'].value_counts(normalize=True)

Reddit                      0.170233
YouTube                     0.163791
WhatsApp                    0.158071
Facebook                    0.156069
Twitter                     0.134988
Instagram                   0.074150
I don't use social media    0.065777
LinkedIn                    0.053306
WeChat 微信                   0.007899
Snapchat                    0.007437
VK ВКонта́кте               0.007141
Weibo 新浪微博                  0.000663
Youku Tudou 优酷              0.000249
Hello                       0.000225
Name: SocialMedia, dtype: float64

In [23]:
df['Country'].value_counts()

United States                       20949
India                                9061
Germany                              5866
United Kingdom                       5737
Canada                               3395
                                    ...  
North Korea                             1
Saint Vincent and the Grenadines        1
Niger                                   1
Chad                                    1
Brunei Darussalam                       1
Name: Country, Length: 179, dtype: int64

In [7]:
country = df.groupby(['Country'])
country.get_group('India') # or use filter both same

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
22013,I am a developer by profession,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,India,No,"Professional degree (JD, MD, etc.)","A natural science (ex. biology, chemistry, phy...",Taken an online course in programming or softw...,2-9 employees,Academic researcher;Data scientist or machine ...,3,26,3,Very satisfied,Very satisfied,,,,I am not interested in new job opportunities,3-4 years ago,"Write code by hand (e.g., on a whiteboard);Com...",No,Financial performance or funding status of the...,"Something else changed (education, award, medi...",USD,United States dollar,1500000.0,Weekly,2000000.0,50.0,There's no schedule or spec; I work on what se...,Inadequate access to necessary tools;Meetings;...,All or almost all the time (I'm full-time remote),Home,A little below average,"Yes, because I see value in code review",10.0,"Yes, it's part of our process",Developers typically have the most influence o...,I have a great deal of influence,Assembly;Bash/Shell/PowerShell;C;C++;Java;Java...,Python;Swift,Cassandra;Microsoft SQL Server;Oracle,Cassandra,Android;Arduino;Google Cloud Platform;IBM Clou...,IBM Cloud or Watson;Windows,jQuery,,TensorFlow,TensorFlow,Visual Studio;Visual Studio Code,Windows,Development,Implementing our own cryptocurrency,Useful across many domains and could change ma...,Yes,Yes,No,YouTube,Online,Screen Name,,A few times per week,Find answers to specific questions,1-2 times per week,Stack Overflow was much faster,60+ minutes,Yes,I have never participated in Q&A on Stack Over...,Yes,Yes,"Yes, definitely",A lot more welcome now than last year,Tech articles written by other developers;Indu...,,Man,No,Straight / Heterosexual,,Yes,Too long,Easy
28243,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",India,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,2-9 employees,"Developer, back-end;Developer, full-stack;Deve...",25,9,14,Neither satisfied nor dissatisfied,Neither satisfied nor dissatisfied,,,,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles;Interview ...,Yes,Financial performance or funding status of the...,I heard about a job opportunity (from a recrui...,USD,United States dollar,2000000.0,Yearly,2000000.0,20.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Not eno...,All or almost all the time (I'm full-time remote),Office,Far above average,"Yes, because I see value in code review",,,,,C++;C#;Java;JavaScript;Objective-C;Python,C;C++;C#,PostgreSQL;SQLite,SQLite,Android;AWS;iOS;MacOS;Windows,Android;Linux,,,Node.js;Pandas;Unity 3D,Unity 3D;Unreal Engine,Android Studio;Coda;PyCharm;Visual Studio;Visu...,MacOS,I do not use containers,,An irresponsible use of resources,No,Yes,No,I don't use social media,In real life (in person),Username,2008,Daily or almost daily,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,60+ minutes,Yes,Multiple times per day,Yes,"No, and I don't know what those are","Yes, definitely",A lot less welcome now than last year,Tech meetups or events in your area,,,,Straight / Heterosexual,,Yes,Too short,Easy
72732,"I am not primarily a developer, but I write co...",No,Less than once a month but more than once per ...,"OSS is, on average, of LOWER quality than prop...",,India,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Contributed to open source software,,,4,20,5,Slightly satisfied,Slightly satisfied,,,,I am actively looking for a job,More than 4 years ago,Write any code,No,Remote work options;Opportunities for professi...,I was preparing for a job search,USD,United States dollar,80000.0,Weekly,2000000.0,40.0,There is a schedule and/or spec (made by me or...,Lack of support from management;Meetings;Time ...,Less than once per month / Never,Office,A little above average,"Yes, because I see value in code review",10.0,"Yes, it's part of our process",Not sure,I have little or no influence,Java;Python;SQL,,MongoDB;Microsoft SQL Server;MySQL;Oracle;Post...,MySQL,Android;Windows,AWS;Docker,,Angular/Angular.js;React.js,,,Eclipse;Notepad++;PyCharm,Windows,Testing,,,Yes,Yes,No,WhatsApp,Neither,,2017,A few times per month or weekly,Get a sense of belonging to the developer comm...,1-2 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,A few times per month or weekly,Yes,"No, and I don't know what those are",Not sure,A lot less welcome now than last year,Tech articles written by other developers;Tech...,,Man,No,,,Yes,Too long,Easy
16793,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",India,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"Just me - I am a freelancer, sole proprietor, ...",Academic researcher;Data or business analyst;D...,14,14,6,Neither satisfied nor dissatisfied,Neither satisfied nor dissatisfied,,,,I am actively looking for a job,1-2 years ago,"Write any code;Write code by hand (e.g., on a ...",Yes,Office environment or company culture;Opportun...,I heard about a job opportunity (from a recrui...,INR,Indian rupee,106673250.0,Yearly,1000000.0,50.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Non-work commitme...,It's complicated,Office,Far above average,"Yes, because I see value in code review",20.0,,,,Assembly;Bash/Shell/PowerShell;C;C++;C#;Dart;G...,Bash/Shell/PowerShell;C;C++;Go;HTML/CSS;Java;J...,Cassandra;Couchbase;DynamoDB;Elasticsearch;Fir...,Cassandra;Couchbase;DynamoDB;Elasticsearch;Fir...,Android;Arduino;AWS;Docker;Google Cloud Platfo...,Android;Arduino;AWS;Docker;Google Cloud Platfo...,Angular/Angular.js;ASP.NET;Django;Drupal;Expre...,Angular/Angular.js;ASP.NET;Django;Flask;jQuery...,Apache Spark;Flutter;Hadoop;.NET;.NET Core;Nod...,Apache Spark;Hadoop;Pandas;TensorFlow;Torch/Py...,Android Studio;Atom;Eclipse;Emacs;IntelliJ;IPy...,Linux-based,Development;Testing;Production;Outside of work...,,Useful across many domains and could change ma...,Yes,Also Yes,What?,LinkedIn,Online,Handle,2009,Multiple times per day,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was much faster,0-10 minutes,Yes,A few times per week,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers;Indu...,27.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Neither easy nor difficult
52561,I am a developer by profession,Yes,Never,"OSS is, on average, of LOWER quality than prop...",Employed full-time,India,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"5,000 to 9,999 employees","Developer, back-end;Developer, desktop or ente...",12,11,9,Slightly satisfied,Slightly satisfied,Somewhat confident,No,No,"I’m not actively looking, but I am open to new...",3-4 years ago,"Write any code;Write code by hand (e.g., on a ...",No,"Languages, frameworks, and other technologies ...","Something else changed (education, award, medi...",INR,Indian rupee,2000000.0,Weekly,1000000.0,40.0,There's no schedule or spec; I work on what se...,Being tasked with non-development work;Distrac...,A few days each month,Office,A little above average,"Yes, because I was told to do so",2.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have some influence,C#;JavaScript,C#;JavaScript,DynamoDB;Microsoft SQL Server;SQLite,DynamoDB;Microsoft SQL Server,AWS;Docker;Linux;Windows,AWS;Docker;Linux;Windows,Angular/Angular.js;ASP.NET;Express;React.js,Angular/Angular.js;ASP.NET;Express;React.js,.NET;.NET Core;Node.js,.NET;.NET Core;Node.js,Visual Studio;Visual Studio Code,Windows,Development;Testing;Production;Outside of work...,Not at all,Useful for immutable record keeping outside of...,No,Also Yes,Yes,YouTube,Online,UserID,2009,Daily or almost daily,Find answers to specific questions;Contribute ...,1-2 times per week,Stack Overflow was much faster,0-10 minutes,Yes,A few times per month or weekly,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,31.0,Man,No,Straight / Heterosexual,South Asian,Yes,Too long,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77339,,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,India,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Another engineering discipline (ex. civil, ele...",Taken an online course in programming or softw...,"1,000 to 4,999 employees",,1,27,1,,,Somewhat confident,Yes,Yes,,,,,,,,,,,,,,,,,,,,,,,Other(s):,Python;SQL,,,,,,,,,,Linux-based,I do not use containers,,,Yes,Yes,No,YouTube,Online,UserID,2019,Less than once per month or monthly,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I didn't know that Stack Overflow had a jo...","No, I've heard of them, but I am not part of a...","Yes, somewhat",Not applicable - I did not use Stack Overflow ...,Tech articles written by other developers;Indu...,,,,,,,,
79795,,Yes,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,India,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,500 to 999 employees,"Developer, QA or test",6,17,5,,,Somewhat confident,No,Not sure,,,,,,,,,,,,,,,,,,,,,,,Bash/Shell/PowerShell;Python;SQL;VBA,,,,,,,,Apache Spark;Chef;Puppet,,PyCharm;Vim,Linux-based,Development;Testing;Production;Outside of work...,Not at all,,No,Yes,What?,Instagram,In real life (in person),Username,2018,A few times per month or weekly,Find answers to specific questions,Less than once per week,They were about the same,,Yes,I have never participated in Q&A on Stack Over...,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Somewhat more welcome now than last year,Tech meetups or events in your area;Courses on...,,Man,No,Straight / Heterosexual,,No,Too long,Difficult
83862,,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,India,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)",,Participated in a hackathon,,Data or business analyst;Student,1,18,Less than 1 year,,,Very confident,Not sure,Yes,,,,,,,,,,,,,,,,,,,,,,,Assembly;C;C++;HTML/CSS;Java;JavaScript;Object...,,MySQL,DynamoDB;Elasticsearch;MongoDB,Android;AWS;Google Cloud Platform;WordPress,IBM Cloud or Watson,Laravel,Angular/Angular.js;Laravel;Vue.js,,Node.js,Android Studio;Atom;IntelliJ;Komodo;NetBeans;N...,Windows,I do not use containers,,Useful across many domains and could change ma...,No,Yes,What?,Twitter,Online,UserID,2012,A few times per month or weekly,Find answers to specific questions;Learn how t...,Less than once per week,Stack Overflow was much faster,0-10 minutes,Yes,I have never participated in Q&A on Stack Over...,Yes,"No, and I don't know what those are","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,24.0,Man,No,Straight / Heterosexual,,Yes,Too long,Neither easy nor difficult
84299,,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,India,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,Taken an online course in programming or softw...,100 to 499 employees,"Developer, back-end;Developer, front-end;Devel...",12,25,12,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python;Swift;TypeScript,HTML/CSS;Java;JavaScript,MongoDB;Redis;SQLite,MongoDB;Redis,,,Angular/Angular.js;Express;jQuery;React.js;Oth...,Express;React.js;Other(s):,Node.js,Node.js,IntelliJ;Notepad++;Visual Studio Code;Xcode,Windows,,,,Yes,"Fortunately, someone else has that title",What?,LinkedIn,,,2011,A few times per month or weekly,Find answers to specific questions;Contribute ...,Less than once per week,Stack Overflow was much faster,60+ minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","Yes, somewhat",Somewhat more welcome now than last year,,,,,,,,,


In [25]:
country.get_group('United States')['SocialMedia'].value_counts()

Reddit                      5700
Twitter                     3468
Facebook                    2844
YouTube                     2463
I don't use social media    1851
Instagram                   1652
LinkedIn                    1020
WhatsApp                     609
Snapchat                     326
WeChat 微信                     93
VK ВКонта́кте                  9
Weibo 新浪微博                     8
Hello                          2
Youku Tudou 优酷                 1
Name: SocialMedia, dtype: int64

In [26]:
country['SocialMedia'].value_counts().head(50)

Country              SocialMedia             
Afghanistan          Facebook                     15
                     YouTube                       9
                     I don't use social media      6
                     WhatsApp                      4
                     Instagram                     1
                     LinkedIn                      1
                     Twitter                       1
Albania              WhatsApp                     18
                     Facebook                     16
                     Instagram                    13
                     YouTube                      10
                     Twitter                       8
                     LinkedIn                      7
                     Reddit                        6
                     I don't use social media      4
                     Snapchat                      1
                     WeChat 微信                     1
Algeria              YouTube                      42


In [27]:
country['SocialMedia'].value_counts().loc['India']

SocialMedia
WhatsApp                    2990
YouTube                     1820
LinkedIn                     955
Facebook                     841
Instagram                    822
Twitter                      542
Reddit                       473
I don't use social media     250
Snapchat                      23
Hello                          5
WeChat 微信                      5
VK ВКонта́кте                  4
Youku Tudou 优酷                 2
Weibo 新浪微博                     1
Name: SocialMedia, dtype: int64

In [28]:
country['ConvertedComp'].median().loc['United States']

110000.0

In [29]:
country['ConvertedComp'].agg(['median', 'mean']).loc['India']

median    10080.000000
mean      28057.664916
Name: India, dtype: float64

In [30]:
filt = df['Country'] == 'India'
df.loc[filt]['LanguageWorkedWith'].str.contains('Python').sum()

3105

In [31]:
country['LanguageWorkedWith'].str.contains('Python')

AttributeError: 'SeriesGroupBy' object has no attribute 'str'

In [32]:
country['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [33]:
# find percentage of people from each country who have worked with python

In [36]:
TotalResp = df['Country'].value_counts()
TotalResp

United States                       20949
India                                9061
Germany                              5866
United Kingdom                       5737
Canada                               3395
                                    ...  
North Korea                             1
Saint Vincent and the Grenadines        1
Niger                                   1
Chad                                    1
Brunei Darussalam                       1
Name: Country, Length: 179, dtype: int64

In [37]:
WorkedPython = country['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())
WorkedPython

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [40]:
python_df = pd.concat([TotalResp, WorkedPython], axis = 'columns')
python_df.head(25)

Unnamed: 0,Country,LanguageWorkedWith
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
France,2391,1054
Brazil,1948,767
Poland,1922,751
Australia,1903,790
Netherlands,1852,767


In [45]:
python_df.rename(columns = {'Country':'TotalRespondents', 'LanguageWorkedWith':'WorkedOnPython'}, inplace = True)

In [46]:
python_df

Unnamed: 0,TotalRespondents,WorkedOnPython
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
North Korea,1,0
Saint Vincent and the Grenadines,1,0
Niger,1,1
Chad,1,0


In [64]:
python_df['PctWorkedOnPython'] = round((WorkedPython/TotalResp) * 100, 2)
python_df

Unnamed: 0,TotalRespondents,WorkedOnPython,PctWorkedOnPython
United States,20949,10083,48.13
India,9061,3105,34.27
Germany,5866,2451,41.78
United Kingdom,5737,2384,41.55
Canada,3395,1558,45.89
...,...,...,...
North Korea,1,0,0.00
Saint Vincent and the Grenadines,1,0,0.00
Niger,1,1,100.00
Chad,1,0,0.00


In [59]:
python_df.sort_values(by = 'PctWorkedOnPython', ascending = False).head(25)

Unnamed: 0,TotalRespondents,WorkedOnPython,PctWorkedOnPython
Sao Tome and Principe,1,1,100.0
Niger,1,1,100.0
Timor-Leste,1,1,100.0
Dominica,1,1,100.0
Turkmenistan,7,6,85.71
Mauritania,7,5,71.43
Guyana,3,2,66.67
Guinea,3,2,66.67
Bahamas,3,2,66.67
Uganda,72,47,65.28


In [56]:
python_df.loc['United States']

TotalRespondents     20949.000000
WorkedOnPython       10083.000000
PctWorkedOnPython       48.131176
Name: United States, dtype: float64

In [66]:
python_df.to_csv('PythonSOF.csv') # write to CSV file.

In [13]:
df['YearsCode'].mean()

TypeError: can only concatenate str (not "int") to str

In [24]:
df['YearsCode'].unique()

array(['4', nan, '3', '16', '13', '6', '8', '12', '2', '5', '17', '10',
       '14', '35', '7', 0, '30', '9', '26', '40', '19', '15', '20', '28',
       '25', '1', '22', '11', '33', '50', '41', '18', '34', '24', '23',
       '42', '27', '21', '36', '32', '39', '38', '31', '37', 51, '29',
       '44', '45', '48', '46', '43', '47', '49'], dtype=object)

In [23]:
df['YearsCode'].replace('Less than 1 year', 0, inplace=True)
df['YearsCode'].replace('More than 50 years', 51, inplace=True)

In [25]:
df['YearsCode'] = df['YearsCode'].astype(float)

In [26]:
df['YearsCode'].mean()

11.662114216834588

In [27]:
df['YearsCode'].median()

9.0