# Count the number of lines in Python for each file

In [1]:
pwd

'/home/dsc/Repos/AmadeusChallenge/KSchool'

In [4]:
cd ~/Data/challenge/

/home/dsc/Data/challenge


In [5]:
! ls -l

total 1017188
-rwxr-x--- 1 dsc dsc 554970628 mar 13  2018 bookings.csv.bz2
-rw-r--r-- 1 dsc dsc   2119069 ene 30 20:12 bookings_sample.csv
-rw-r--r-- 1 dsc dsc    270148 ene 30 20:12 bookings_sample.csv.bz2
-rwxr-x--- 1 dsc dsc 483188920 mar 13  2018 searches.csv.bz2
-rw-r--r-- 1 dsc dsc    910597 ene 30 20:12 searches_sample.csv
-rw-r--r-- 1 dsc dsc    120210 ene 30 20:12 searches_sample.csv.bz2


## 1) Command Line

Use shell commands with the `!` notation to count the number of lines in `bookings.csv.bz2` and `searches.csv.bz2`.

In [None]:
#! bzcat bookings.csv.bz2 | wc -l

In [7]:
!bzcat bookings.csv.bz2 | head -1000 > bookings.sample.csv


bzcat: I/O or other error, bailing out.  Possible reason follows.
bzcat: Broken pipe
	Input file = bookings.csv.bz2, output file = (stdout)


In [8]:
! ls

bookings.csv.bz2     bookings_sample.csv.bz2  searches_sample.csv.bz2
bookings_sample.csv  searches.csv.bz2
bookings.sample.csv  searches_sample.csv


In [11]:
# Compressing it again to execute the code with a smaller sample

!bzip2 -f bookings.sample.csv

bzip2: Can't open input file bookings.sample.csv: No such file or directory.


In [12]:
! ls

bookings.csv.bz2	 bookings.sample.csv.bz2  searches_sample.csv.bz2
bookings_sample.csv	 searches.csv.bz2
bookings_sample.csv.bz2  searches_sample.csv


In [13]:
! bzcat bookings.sample.csv.bz2 | wc -l

1000


In [15]:
pwd

'/home/dsc/Data/challenge'

%%bash to execute several shell commands in a single cell

In [20]:
%%bash
bzcat searches.csv.bz2 | head -1000 > searches.sample.csv
bzip2 -f searches.sample.csv
bzcat searches.sample.csv.bz2 | wc -l

1000


## 2) Python:

We have 2 options:

* uncompressing the whole file, then reading from the result.

* without uncompressing: better, because we don't expend as much storage or litter our HDD.


#### Python without uncompressing

In [38]:
import bz2

In [39]:
filename= './bookings.sample.csv.bz2'

In [40]:
fileBz2 = bz2.BZ2File(filename)

In [41]:
type(fileBz2)

bz2.BZ2File

In [42]:
k=0

for line in fileBz2:
    k+=1
    
print('Number of lines: %s'%(k))

Number of lines: 1000


### WITH

In [43]:
# Con with al ejecutar la acción no deja el archivo abierto

In [47]:
with bz2.BZ2File(filename) as file_input:
    k=0
    for line in file_input:
        k+=1
    
print('Number of lines: %d'%(k))

Number of lines: 1000


In [45]:
file_input.closed

True

### Try Except

In [63]:
try:
    with bz2.BZ2File(filename) as file_input:
        for k,line in enumerate(file_input):
            pass
        print('%s has Number of lines: %d'%(filename,k+1))
        
except ValueError:
    print('ValueError')
except IOError:
    print('IOError')
except:
    print('Unexpected Error')

./bookings.sample.csv.bz2 has Number of lines: 1000
