In [1]:
people = {
    'first_name' : ['Yaqub', 'Muhsina', 'Abubakar', 'Taiwo'],
    'last_name' : ['Gaji', 'Abdulkareem', 'Abdulkareem', 'Olatunji'],
    'email': ['gaji@123.com', 'oyin@6.com', 'abdul@gmail.com', 'ola@gmail.ng']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


# Updating Our Columns

In [5]:
# TO get the columns
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [6]:
# Assigning different names to the columns
df.columns = ['first', 'last', 'email']

In [7]:
df

Unnamed: 0,first,last,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [8]:
# List comprehension
# Changing all our columns to Upper case
df.columns = [x.upper() for x in df.columns]

In [9]:
df

Unnamed: 0,FIRST,LAST,EMAIL
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [10]:
# Remove spaces in column name
# WOW, this is going to remove all the spaces and change
# them to '_' underscores or VICE VERSA
df.columns = df.columns.str.replace('_', '')

In [11]:
df

Unnamed: 0,FIRST,LAST,EMAIL
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [12]:
# Changing it back to lowercase for easier manipulation
df.columns = [x.lower() for x in df.columns]

In [13]:
df

Unnamed: 0,first,last,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [14]:
# This right here is a significant change in a particular column
# Changing 'email' to 'gmail'
# df.columns = df.columns.str.replace('email', 'gmail')

In [15]:
# Changing it back to it's original form
df.columns = df.columns.str.replace('gmail', 'email')
df

Unnamed: 0,first,last,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [16]:
# Using the rename method to change some columns
# Using the dictionary method and mapping the former name
# To the new name which is the 'value' of the 'key'
df.rename(columns= {'first': 'first_name', 'last':'last_name'}, inplace=True)

In [17]:
# If we didn't use the 'inplace=True' func we would only get 
# A preview of how it would like if it worked
# It won't change it properly to how we want it
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


# Updating Our Rows

In [18]:
# This is to get a particular row;
# We can change the row by just re-assigning the values to it
df.loc[2] = ['Abubakar', 'Smith', 'Abubakarsmith@gmail.com']

In [19]:
# Now the name has a new 'last_name: Smith' and a new 'email'
df.loc[2]

first_name                   Abubakar
last_name                       Smith
email         Abubakarsmith@gmail.com
Name: 2, dtype: object

In [20]:
# Getting specific columns in a particular row
# Now we've been able to re-assign the previous values to the columns
df.loc[2, ['last_name', 'email']] =  ['abdulkareem', 'abdul@gmail.com']

In [21]:
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@gmail.ng


In [22]:
# Changing a single column
# Always go for the column name, not the value of the column
df.loc[3, 'last_name'] = 'Kehinde'

In [23]:
# Pandas uses 'at' for specific values as well
# df.at[3, 'last_name'] = 'Olatunji'

In [24]:
# The 'at' sets up a new column for the particular row chosen
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [25]:
filt = (df['email']) == 'ola@gmail.ng'
df[filt] # This works for looking up a value
df[filt]['last_name'] # This works for a single column as well

3    Kehinde
Name: last_name, dtype: object

In [26]:
# Always use '.loc' or 'at' if you are trying to set values
# df[filt]['last_name'] = 'Smith' # This won't work

In [27]:
df.loc[filt, 'last_name'] = 'Smith'

In [28]:
# This updates the single row of the data
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,abdulkareem,abdul@gmail.com
3,Taiwo,Smith,ola@gmail.ng


Updating multiple rows of data

In [29]:
# This gives lowercase values of all the emails
df['email']= df['email'].str.lower() # Now this changed the emails properly to lowercase

In [30]:
df['email']

0       gaji@123.com
1         oyin@6.com
2    abdul@gmail.com
3       ola@gmail.ng
Name: email, dtype: object

# apply
# map
# applymap
# replace

In [31]:
# Using 'apply'
# 'apply' uses a func to the data
# Right here we can see that we get the 'len' of the email address
df['email'].apply(len)

0    12
1    10
2    15
3    12
Name: email, dtype: int64

In [32]:
# Using 'apply' to update a value
def update_email(email):
    return email.upper()


In [33]:
# now you can see how 'apply' can apply a function to the data
# don't use 'update_email' with the '()' 
# because you are trying to use the function
# not an executed version of the function
df['email'].apply(update_email) # email is not in 'uppercase'

0       GAJI@123.COM
1         OYIN@6.COM
2    ABDUL@GMAIL.COM
3       OLA@GMAIL.NG
Name: email, dtype: object

In [34]:
# The cell above doesn't change the values of the series
# So, what we can do is re-assign the values to the column
df['email'] = df['email'].apply(update_email)

In [35]:
df['email']

0       GAJI@123.COM
1         OYIN@6.COM
2    ABDUL@GMAIL.COM
3       OLA@GMAIL.NG
Name: email, dtype: object

In [36]:
# Using 'lambda' function
# converting back to lowercase
# Lambda is a 'no-name' func
# What it does is you pass the variable followed by a 
# column ':' then you pass in the operation/function you want the 
# variable to do
df['email'] = df['email'].apply(lambda x: x.lower())

In [37]:
# 'email' has been updated back to 'lowercase'
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,abdulkareem,abdul@gmail.com
3,Taiwo,Smith,ola@gmail.ng


In [38]:
# How 'apply' works with a data frame now, not only on a series
df['email'].apply(len)

0    12
1    10
2    15
3    12
Name: email, dtype: int64

In [39]:
# This doesn't apply to the values of the data frames
# What this does is it applies to the series, by counting
# How many 'first_names' do we have or 'email' 
# and giving us the lengths of those one
# Changing the axis now makes it count the len of the columns
df.apply(len, axis = 'columns')

0    3
1    3
2    3
3    3
dtype: int64

In [40]:
# This is what the cell above is doing
len(df['email'])

4

In [41]:
# This doesn't work as expected because they are not numerical values

df.apply(pd.Series.min)

first_name           Abubakar
last_name         Abdulkareem
email         abdul@gmail.com
dtype: object

In [42]:
# 'lambda' works on a series not the whole data frame(df)
# You could use 'lambda' as well
# but do not forget that it doesn't work on 'floats' or 'str'
# Try out this functions on Numerical value
df.apply(lambda x: x.min())

first_name           Abubakar
last_name         Abdulkareem
email         abdul@gmail.com
dtype: object

APPLY MAP

In [43]:
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Abdulkareem,oyin@6.com
2,Abubakar,abdulkareem,abdul@gmail.com
3,Taiwo,Smith,ola@gmail.ng


In [44]:
# 'applymap' is diff from 'apply' because instead of applying
# only to a series, it applies the function accross the entire
# data frane
df.applymap(len)

Unnamed: 0,first_name,last_name,email
0,5,4,12
1,7,11,10
2,8,11,15
3,5,5,12


In [47]:
# Now all the data are in lowercase
df.applymap(str.lower)

Unnamed: 0,first_name,last_name,email
0,yaqub,gaji,gaji@123.com
1,muhsina,abdulkareem,oyin@6.com
2,abubakar,abdulkareem,abdul@gmail.com
3,taiwo,smith,ola@gmail.ng


MAP

In [49]:
# 'map' works only on series
# The 'map' method is used for substituting values in a series
# you also need to specify the series you would like to change as well

df['first_name'].map({'Yaqub': 'Yk', 'Muhsina': 'Damola'})

0        Yk
1    Damola
2       NaN
3       NaN
Name: first_name, dtype: object

In [50]:
# Instead of using map that substitutes the expected values
# But instead deletes the other names by changing them to 'NaN'
# We can use the 'replace' method
# Now other values are not deleted or replaced by 'NaN'
df['first_name'].replace({'Yaqub': 'Yk', 'Muhsina': 'Damola'})

0          Yk
1      Damola
2    Abubakar
3       Taiwo
Name: first_name, dtype: object

In [51]:
# The 2 cells above don't actually make a permanent change
# to the data, if we want a permanent change then we have 
# to re-assign like we did before
df['first_name'] = df['first_name'].replace({'Yaqub': 'Yk', 'Muhsina': 'Damola'})

In [53]:
# Now we can see the values have been substituted
df

Unnamed: 0,first_name,last_name,email
0,Yk,Gaji,gaji@123.com
1,Damola,Abdulkareem,oyin@6.com
2,Abubakar,abdulkareem,abdul@gmail.com
3,Taiwo,Smith,ola@gmail.ng


# Testing what we've learnt in our stack overflow data

In [57]:
df1 = pd.read_csv('data/survey_results_public.csv', index_col = 'Respondent')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column' )

In [58]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [60]:
df1.head()

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3.0,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16.0,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [64]:
# Renaming the column 'ConvertedComp' to 'Salary'
# For easier readability
# Using the rename method we can now change the name of the column
# Always confirm that what you changed has been applied
# before using 'inplace=True'
df1.rename(columns= {'ConvertedComp': 'SalaryUSD'}, inplace=True)

In [67]:
# Confirming the change
df1['SalaryUSD'] # It works

Respondent
1            NaN
2            NaN
3         8820.0
4        61000.0
5            NaN
          ...   
88377        NaN
88601        NaN
88802        NaN
88816        NaN
88863        NaN
Name: SalaryUSD, Length: 88883, dtype: float64

In [69]:
df1['Hobbyist']

Respondent
1        Yes
2         No
3        Yes
4         No
5        Yes
        ... 
88377    Yes
88601     No
88802     No
88816     No
88863    Yes
Name: Hobbyist, Length: 88883, dtype: object

In [71]:
# Using the 'map' method we can now change all the 'Yes' and 'No'
# values to 'True' or 'False'
df1['Hobbyist'].map({'Yes': 'True', 'No': 'False'})

Respondent
1         True
2        False
3         True
4        False
5         True
         ...  
88377     True
88601    False
88802    False
88816    False
88863     True
Name: Hobbyist, Length: 88883, dtype: object

In [81]:
# Now we have sucessfully re-assigned the values to 'Hobbyist' column
df1['Hobbyist'] = df1['Hobbyist'].map({'Yes': 'True', 'No': 'False'})

In [82]:
df1['Hobbyist']

Respondent
1        NaN
2        NaN
3        NaN
4        NaN
5        NaN
        ... 
88377    NaN
88601    NaN
88802    NaN
88816    NaN
88863    NaN
Name: Hobbyist, Length: 88883, dtype: object