In [1]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer","Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com","JaneDoe@email.com","JohnDoe@email.com"]
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [5]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [6]:
# set email as index
# since email is used as index it appears in bold

df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [7]:
# but if you don't set it inplace = True, panda will not permanently do so
# which is actually nice, as it will allows us to experiment without
# permanently changing the original data

df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
# if you want it to be permanent

df.set_index('email',inplace = True)

In [12]:
# so if you run it again it will already reflect email as index
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [13]:
#check the indexes
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [14]:
# check information for corey

df.loc['CoreyMSchafer@gmail.com']

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [15]:
# now email can be used as index,
# and if you want to get the last name then

df.loc['CoreyMSchafer@gmail.com','last']

'Schafer'

In [17]:
# now you will get an error using zero as index
# because email is already our index

df.loc[0]

KeyError: 0

In [18]:
# if you want to use integer location
# use the iloc

df.iloc[0]

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [21]:
# if you want to reset changes
df.reset_index(inplace=True)
df

Unnamed: 0,index,email,first,last
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


In [23]:
df = pd.read_csv('/Users/jean110284/Desktop/Everything/MJUPython/survey_results_public.csv')
schema_df = pd.read_csv('/Users/jean110284/Desktop/Everything/MJUPython/survey_results_schema.csv')

In [25]:
df.head(2)

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,


In [30]:
# we are using numerical/default index
# but if we look at ResponseId column, it is actually a unique id
# we can make respondent id as index 
# add ResponseId as index while reading in the csv file

df = pd.read_csv('/Users/jean110284/Desktop/Everything/MJUPython/survey_results_public.csv',index_col='ResponseId')

In [31]:
df.head(2)

Unnamed: 0_level_0,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,YearsCodePro,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,


In [33]:
# first respondent

df.loc[1]

MainBranch                                         I am a developer by profession
Employment                      Independent contractor, freelancer, or self-em...
Country                                                                  Slovakia
US_State                                                                      NaN
UK_Country                                                                    NaN
EdLevel                         Secondary school (e.g. American high school, G...
Age1stCode                                                          18 - 24 years
LearnCode                       Coding Bootcamp;Other online resources (ex: vi...
YearsCode                                                                     NaN
YearsCodePro                                                                  NaN
DevType                                                         Developer, mobile
OrgSize                                                        20 to 99 employees
Currency        

In [34]:
# we also have another dataframe
# if you wanted to know what hobbiest meant?

schema_df

Unnamed: 0,qid,qname,question,force_resp,type,selector
0,QID16,S0,"<div><span style=""font-size:19px;""><strong>Hel...",False,DB,TB
1,QID12,MetaInfo,Browser Meta Info,False,Meta,Browser
2,QID1,S1,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
3,QID2,MainBranch,Which of the following options best describes ...,True,MC,SAVR
4,QID24,Employment,Which of the following best describes your cur...,False,MC,MAVR
5,QID6,Country,"Where do you live? <span style=""font-weight: b...",True,MC,DL
6,QID7,US_State,<p>In which state or territory of the USA do y...,False,MC,DL
7,QID9,UK_Country,In which part of the United Kingdom do you liv...,False,MC,DL
8,QID190,S2,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
9,QID25,EdLevel,Which of the following best describes the high...,False,MC,SAVR


In [42]:
# wanted to locate what a specific column meant?
# set the name of the column (qname) as index
# re-run the schema_df
# then can use this as location

schema_df = pd.read_csv('/Users/jean110284/Desktop/Everything/MJUPython/survey_results_schema.csv', index_col='qname')


In [40]:
schema_df

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S0,QID16,"<div><span style=""font-size:19px;""><strong>Hel...",False,DB,TB
MetaInfo,QID12,Browser Meta Info,False,Meta,Browser
S1,QID1,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
MainBranch,QID2,Which of the following options best describes ...,True,MC,SAVR
Employment,QID24,Which of the following best describes your cur...,False,MC,MAVR
Country,QID6,"Where do you live? <span style=""font-weight: b...",True,MC,DL
US_State,QID7,<p>In which state or territory of the USA do y...,False,MC,DL
UK_Country,QID9,In which part of the United Kingdom do you liv...,False,MC,DL
S2,QID190,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
EdLevel,QID25,Which of the following best describes the high...,False,MC,SAVR


In [43]:
# so if you want to see a certain column on the schema then

schema_df.loc['Employment']

qid                                                       QID24
question      Which of the following best describes your cur...
force_resp                                                False
type                                                         MC
selector                                                   MAVR
Name: Employment, dtype: object

In [46]:
# if you want to see the entire text / fulltext of the question

schema_df.loc['Employment','question']

'Which of the following best describes your current <b>employment status</b>?'

In [47]:
schema_df.sort_index(ascending = False)

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
YearsCodePro,QID34,"NOT including education, how many years have y...",False,MC,DL
YearsCode,QID32,"Including any education, how many years have y...",False,MC,DL
Webframe,QID264,Which <strong>web frameworks </strong><span st...,False,Matrix,Likert
US_State,QID7,<p>In which state or territory of the USA do y...,False,MC,DL
UK_Country,QID9,In which part of the United Kingdom do you liv...,False,MC,DL
Trans,QID153,Do you identify as transgender?,False,MC,MAVR
ToolsTech,QID275,Which <strong>tools</strong> have you done ext...,False,Matrix,Likert
SurveyLength,QID132,How do you feel about the length of the survey...,False,MC,MAVR
SurveyEase,QID133,How easy or difficult was this survey to compl...,False,MC,MAVR
Sexuality,QID136,"Which of the following describe you, if any? P...",False,MC,MAVR


In [48]:
# permanently sort in ascending order

schema_df.sort_index(ascending = True)

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accessibility,QID124,"Which of the following describe you, if any? P...",False,MC,MAVR
Age,QID127,What is your age?,False,MC,MAVR
Age1stCode,QID149,At what age did you write your first line of c...,False,MC,MAVR
CompFreq,QID52,"Is that compensation weekly, monthly, or yearly?",False,MC,MAVR
CompTotal,QID51,What is your current total compensation (salar...,False,TE,SL
Country,QID6,"Where do you live? <span style=""font-weight: b...",True,MC,DL
Currency,QID50,Which currency do you use day-to-day? If your ...,True,MC,SB
Database,QID262,Which <b>database environments </b>have you do...,False,Matrix,Likert
DevType,QID31,Which of the following describes your current ...,False,MC,MAVR
EdLevel,QID25,Which of the following best describes the high...,False,MC,SAVR


In [49]:
# END June 23, 2022 | 12:54 P.M