### 1. Getting started

In [1]:
import pandas as pd

In [58]:
df = pd.read_csv("./data/survey_results_public.csv", index_col='ResponseId')
schema_df = pd.read_csv("./data/survey_results_schema.csv", index_col = 'qname')

In [3]:
pd.set_option('display.max_columns',85)     #setting max col limit
pd.set_option('display.max_rows',85)     #setting max row limit

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 83439 entries, 1 to 83439
Data columns (total 47 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   MainBranch                    83439 non-null  object 
 1   Employment                    83323 non-null  object 
 2   Country                       83439 non-null  object 
 3   US_State                      14920 non-null  object 
 4   UK_Country                    4418 non-null   object 
 5   EdLevel                       83126 non-null  object 
 6   Age1stCode                    83243 non-null  object 
 7   LearnCode                     82963 non-null  object 
 8   YearsCode                     81641 non-null  object 
 9   YearsCodePro                  61216 non-null  object 
 10  DevType                       66484 non-null  object 
 11  OrgSize                       60726 non-null  object 
 12  Currency                      61080 non-null  object 
 13  C

In [60]:
schema_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 48 entries, S0 to SurveyEase
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   qid         48 non-null     object
 1   question    48 non-null     object
 2   force_resp  48 non-null     bool  
 3   type        48 non-null     object
 4   selector    48 non-null     object
dtypes: bool(1), object(4)
memory usage: 1.9+ KB


In [6]:
schema_df.head(10)

Unnamed: 0,qid,qname,question,force_resp,type,selector
0,QID16,S0,"<div><span style=""font-size:19px;""><strong>Hel...",False,DB,TB
1,QID12,MetaInfo,Browser Meta Info,False,Meta,Browser
2,QID1,S1,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
3,QID2,MainBranch,Which of the following options best describes ...,True,MC,SAVR
4,QID24,Employment,Which of the following best describes your cur...,False,MC,MAVR
5,QID6,Country,"Where do you live? <span style=""font-weight: b...",True,MC,DL
6,QID7,US_State,<p>In which state or territory of the USA do y...,False,MC,DL
7,QID9,UK_Country,In which part of the United Kingdom do you liv...,False,MC,DL
8,QID190,S2,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
9,QID25,EdLevel,Which of the following best describes the high...,False,MC,SAVR


### 2. DataFrame and Series : Basics

In [10]:
df['Country'].value_counts()

United States of America                                15288
India                                                   10511
Germany                                                  5625
United Kingdom of Great Britain and Northern Ireland     4475
Canada                                                   3012
                                                        ...  
Saint Kitts and Nevis                                       1
Dominica                                                    1
Saint Vincent and the Grenadines                            1
Tuvalu                                                      1
Papua New Guinea                                            1
Name: Country, Length: 181, dtype: int64

In [15]:
df.loc[[i for i in range(10)],'Country']

0                                             Slovakia
1                                          Netherlands
2                                   Russian Federation
3                                              Austria
4    United Kingdom of Great Britain and Northern I...
5                             United States of America
6                             United States of America
7                                             Malaysia
8                                                India
9                                               Sweden
Name: Country, dtype: object

In [18]:
df.loc[0:10,'Country':'YearsCode']

Unnamed: 0,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode
0,Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,
1,Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0
2,Russian Federation,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",
3,Austria,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,
4,United Kingdom of Great Britain and Northern I...,,England,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0
5,United States of America,Georgia,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",
6,United States of America,New Hampshire,,"Secondary school (e.g. American high school, G...",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",3.0
7,Malaysia,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,School;Online Courses or Certification,4.0
8,India,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",18 - 24 years,Coding Bootcamp,6.0
9,Sweden,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,School,7.0


In [19]:
df.loc[0:10,'Country':'YearsCode'].describe()

Unnamed: 0,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode
count,11,2,2,11,11,10,7
unique,9,2,1,3,3,10,6
top,United Kingdom of Great Britain and Northern I...,Georgia,England,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,Coding Bootcamp;Other online resources (ex: vi...,7
freq,2,1,2,6,8,1,2


### 3. Indexes

In [28]:
schema_df.set_index('qname',inplace=True)

In [31]:
schema_df.loc[['Country','Age1stCode']]

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Country,QID6,"Where do you live? <span style=""font-weight: b...",True,MC,DL
Age1stCode,QID149,At what age did you write your first line of c...,False,MC,MAVR


In [37]:
schema_df.sort_index(ascending=False, inplace=True)

In [38]:
schema_df

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
YearsCodePro,QID34,"NOT including education, how many years have y...",False,MC,DL
YearsCode,QID32,"Including any education, how many years have y...",False,MC,DL
Webframe,QID264,Which <strong>web frameworks </strong><span st...,False,Matrix,Likert
US_State,QID7,<p>In which state or territory of the USA do y...,False,MC,DL
UK_Country,QID9,In which part of the United Kingdom do you liv...,False,MC,DL
Trans,QID153,Do you identify as transgender?,False,MC,MAVR
ToolsTech,QID275,Which <strong>tools</strong> have you done ext...,False,Matrix,Likert
SurveyLength,QID132,How do you feel about the length of the survey...,False,MC,MAVR
SurveyEase,QID133,How easy or difficult was this survey to compl...,False,MC,MAVR
Sexuality,QID136,"Which of the following describe you, if any? P...",False,MC,MAVR


### 4. Filtering

In [70]:
countries = ['United States of America', 'India', 'Germany']
filt = (df['CompTotal']>7000) & (df['Country'].isin(countries)) & (df['Gender'].str.contains('M', na=False))

In [73]:
filt

ResponseId
1        False
2        False
3        False
4        False
5        False
         ...  
83435     True
83436    False
83437    False
83438    False
83439    False
Length: 83439, dtype: bool

In [72]:
df.loc[filt,['MainBranch', 'YearsCode','Country','Sexuality', 'Gender']]

Unnamed: 0_level_0,MainBranch,YearsCode,Country,Sexuality,Gender
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13,I am a developer by profession,15,Germany,Straight / Heterosexual,Man
25,I am a developer by profession,26,Germany,Straight / Heterosexual,Man
36,I am a developer by profession,19,Germany,Straight / Heterosexual,Man
37,I am a developer by profession,8,United States of America,Straight / Heterosexual,Man
38,I am a developer by profession,20,United States of America,Straight / Heterosexual,Man
...,...,...,...,...,...
83417,I am a developer by profession,34,United States of America,Straight / Heterosexual,Man
83425,I am a developer by profession,17,Germany,Straight / Heterosexual,Man
83428,I am a developer by profession,13,United States of America,Straight / Heterosexual,Man
83431,I am a developer by profession,23,United States of America,Straight / Heterosexual,Man
