# The Unix School: awk

### Lesson 1: read a file & split the contents

In [2]:
!cat file1

Name Domain
Deepak Banking
Neha Telecom
Vijay Finance
Guru Migration

In [3]:
# print *only* the names, then domains, in the file
!awk '{print $1}' file1
!awk '{print $2}' file1

Name
Deepak
Neha
Vijay
Guru
Domain
Banking
Telecom
Finance
Migration


In [4]:
# print the names without the header record
# NR = line number; NR!=1 says to omit the 1st line.
!awk 'NR!=1{print $1}' file1

Deepak
Neha
Vijay
Guru


In [5]:
# print entire file contents - $0 = entire line.
!awk '{print $0}' file1

Name Domain
Deepak Banking
Neha Telecom
Vijay Finance
Guru Migration


In [6]:
# another way of printing everything ('1' = true for every line.)
!awk '1' file1

Name Domain
Deepak Banking
Neha Telecom
Vijay Finance
Guru Migration


In [9]:
!cat file2

Name,Domain,Expertise
Deepak,Banking,MQ Series
Neha,Telecom,Power Builder
Vijay,Finance,CRM Expert
Guru,Migration,Unix

In [11]:
# print 1st column of a .CSV file
# awk uses whitespace as a default delimiter.
# .CSV is comma delimited, so we need to specify that.

!awk -F"," '{print $1}' file2

Name
Deepak
Neha
Vijay
Guru


In [13]:
# (alternate syntax using the FS variable - 1st & 3rd columns)
!awk  '{print $1,$3}' FS="," file2

Name Expertise
Deepak MQ Series
Neha Power Builder
Vijay CRM Expert
Guru Unix


In [14]:
# 3rd column has multiple words, so readability is compromised.
# use a comma to separate the output with the OFS special variable.
# and omit the header with NR
!awk -F"," 'NR!=1{print $1,$3}' OFS="," file2

Deepak,MQ Series
Neha,Power Builder
Vijay,CRM Expert
Guru,Unix


### Lesson 2: passing arguments or shell variables to awk

In [18]:
# quoting file content
!cat file2

Name,Domain,Expertise
Deepak,Banking,MQ Series
Neha,Telecom,Power Builder
Vijay,Finance,CRM Expert
Guru,Migration,Unix

In [20]:
!awk -v q="'" '{print q $0 q}' file2

'Name,Domain,Expertise'
'Deepak,Banking,MQ Series'
'Neha,Telecom,Power Builder'
'Vijay,Finance,CRM Expert'
'Guru,Migration,Unix'


In [21]:
# double-quoting file contents
!awk '{print q $0 q}' q='"' file2

"Name,Domain,Expertise"
"Deepak,Banking,MQ Series"
"Neha,Telecom,Power Builder"
"Vijay,Finance,CRM Expert"
"Guru,Migration,Unix"


### Lesson 3: matching file patterns in Linux

In [22]:
!cat file3

Medicine,200
Grocery,500
Rent,900
Grocery,800
Medicine,600

In [26]:
# match only the records containing 'Rent'
!awk '/Rent/' file3

Rent,900


In [28]:
# match a pattern only in the 1st column
!awk -F, '$1 ~ /Rent/' file3

Rent,900


In [30]:
# Above also matches "Rents". Exact match:
!awk -F, '$1=="Rent"' file3

Rent,900


In [32]:
# print only the 2nd column for all "Medicine" records:
!awk -F, '$1 == "Medicine"{print $2}' file3

200
600


In [33]:
# match for patterns "Rent" or "Medicine"
!awk '/Rent|Medicine/' file3

Medicine,200
Rent,900
Medicine,600


In [35]:
# match for this above pattern only in the first column:
!awk -F, '$1 ~ /Rent|Medicine/' file3

Medicine,200
Rent,900
Medicine,600


In [36]:
# exactly match only for Rent or Medicine,
!awk -F, '$1 ~ /^Rent$|^Medicine$/' file3

Medicine,200
Rent,900
Medicine,600


In [37]:
# lines which does not contain the pattern Medicine:
!awk '!/Medicine/' file3

Grocery,500
Rent,900
Grocery,800


In [39]:
# all records whose amount is greater than 500:
!awk -F, '$2>500' file3

Rent,900
Grocery,800
Medicine,600


In [41]:
# print medicine record only if it is the 1st record (&& = logical AND):
!awk 'NR==1 && /Medicine/' file3

Medicine,200


In [42]:
# all Medicine records whose amount is greater than 500:
!awk -F, '/Medicine/ && $2>500' file3

Medicine,600


In [43]:
# all the Medicine records OR whose amount is greater than 600 (|| = logical OR):
!awk -F, '/Medicine/ || $2>600' file3

Medicine,200
Rent,900
Grocery,800
Medicine,600
