# Python Basics Refresher

## Lists

### 1. Slicing

In [2]:
l1 = list(range(1,11))
print(l1)
print(l1[2:-3])
print(l1[-7:-4])
print(l1[-8:-2:2])
print(l1[-2:-8:-2])
print(l1[-5:])
print(l1[-1:-11:-1]) #can omit first/second operand here as well
print(l1[::-1])

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[3, 4, 5, 6, 7]
[4, 5, 6]
[3, 5, 7]
[9, 7, 5]
[6, 7, 8, 9, 10]
[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


### 2. enumerate

In [3]:
branches = ['Mechanical', 'IT', 'INST', 'CSE', 'IT', 'EXTC']
for index, branch in enumerate(branches, start=1):
    print(index, branch)
    

1 Mechanical
2 IT
3 INST
4 CSE
5 IT
6 EXTC


## Sets

### 1. Set creation

In [4]:
science_branches = {'Mechanical', 'IT', 'Management', 'CS', 'IT', 'EXTC', 'Math', 'IT'}
print(science_branches)
print('CS' in science_branches) #in operation optimized for sets

{'EXTC', 'Mechanical', 'Math', 'IT', 'Management', 'CS'}
True


In [5]:
commerce_branches = {'Finance', 'Management','CS','Math', 'Sales'}

In [6]:
empty = set() #not {} - that's an empty dictionary
print(empty)

set()


### 2. Set Operations

In [7]:
print(science_branches.intersection(commerce_branches))

{'Management', 'CS', 'Math'}


In [8]:
print(science_branches.difference(commerce_branches))

{'IT', 'Mechanical', 'EXTC'}


In [9]:
print(science_branches.union(commerce_branches))

{'EXTC', 'Mechanical', 'Math', 'IT', 'Management', 'CS', 'Sales', 'Finance'}


## Dictionaries

### 1. Creation

In [2]:
student = {'name': 'Ashwin', 'age':25, 'course':['comp sci', 'economics'], 5: (1,2)}
print(student)
del student[5]

{'name': 'Ashwin', 'age': 25, 'course': ['comp sci', 'economics'], 5: (1, 2)}


### 2. Access elements

In [11]:
# intuitive way
print(student['name'])
print(student['phone'])

Ashwin


KeyError: 'phone'

In [3]:
# Better way
print(student.get('phone', 'Not found'))

Not found


### 3. Add,update and delete

In [16]:
student['phone'] = '2222-2222'

In [17]:
print(student)
del student['phone']
print(student)

{'name': 'Ashwin', 'age': 25, 'course': ['comp sci', 'economics'], 'phone': '2222-2222'}
{'name': 'Ashwin', 'age': 25, 'course': ['comp sci', 'economics']}


In [18]:
# update with another dictionary
student_new = {'name': 'Anurag', 'age': 21, 'phone':'5555-5555'}
student.update(student_new)

In [20]:
student

{'name': 'Anurag',
 'age': 21,
 'course': ['comp sci', 'economics'],
 'phone': '5555-5555'}

In [21]:
age = student.pop('age')
print(age)

21


### 4. Unpacking Generalization - merge 2 dictionaries

In [4]:
d1 = {'a':1, 'b':2}
d2 = {'c':3, 'd':4}
z1 = {**d1, **d2} #alternate to update
print(z1)

## Overlapping keys - merged left to right and overwrite
d3 = {'a':9, 'b':10}
z2 = {**d1, **d3}
print(z2)


{'a': 1, 'b': 2, 'c': 3, 'd': 4}
{'a': 9, 'b': 10}


### 5. Sort By Value

In [38]:
d1 = {'a':100, 'b':90, 'c':10, 'd':50}
print(sorted(d1.items(), key= lambda x: x[1]))

[('c', 10), ('d', 50), ('b', 90), ('a', 100)]


## Extra

### 1. is operator

In [22]:
a=1,2,3
b=1,2,3
print(a is b)

False


In [23]:
print(id(a), id(b))

140063111918504 140063112383656


In [26]:
b = a
print(a is b) ## id(a)==id(b)

True


In [27]:
print(id(a), id(b))

140063111918504 140063111918504


### 2. False Values

The following values evaluate to False in Python
- False
- None
- Zero (any numeric type)
- Empty sequence - (), [], ' '
- Empty mapping - {}  
**Everything else evaluates to True**

In [28]:
condition = None
if condition:
    print('Inside True')
else:
    print('Inside False')

Inside False


In [29]:
condition = 0.0
if condition:
    print('Inside True')
else:
    print('Inside False')

Inside False


In [31]:
condition = ''
if condition:
    print('Inside True')
else:
    print('Inside False')
condition = []    
if not condition:
    print('Inside True')
else:
    print('Inside False')

Inside False
Inside True


Everything else evaluates to True

In [32]:
condition = [1,2,3]
if condition:
    print('Inside True')
else:
    print('Inside False')

Inside True


## Files

 - Use context managers to work with files as it automatically closes the file.
 - If we use traditional way (using open method), there could be leakage if we don't close and we might get error thrown at us if the number of file descriptors reaches the max limit.
 - The file handler is still accessible after closing the file but we cannot read the file now.

In [53]:
with open('av.yml') as f:
    print(f.name)
    print(f.mode)
print(f.closed)  

av.yml
r
True


In [55]:
with open('av.yml') as f:
    content = f.read()
    print(content)

name: av
channels:
    - https://conda.anaconda.org/menpo
    - conda-forge
dependencies:
    - python==3.7.0
    - matplotlib==2.2.2
    - numpy==1.15.0
    - pandas==0.23.3
    - jupyter



- The read() method is good if we want to read small files
- But if the file size is too big, we may run out of memory
- we can use methods to read one line at a time:
    - use readline() method
    - use for loop on file descriptor to iterate over the lines - one at a time

In [60]:
with open('av.yml') as f:
    line = f.readline()
    print(line)
    line = f.readline()
    print(line)

name: av

channels:



In [59]:
with open('av.yml') as f:
    for line in f:
        print(line, end='')

name: av
channels:
    - https://conda.anaconda.org/menpo
    - conda-forge
dependencies:
    - python==3.7.0
    - matplotlib==2.2.2
    - numpy==1.15.0
    - pandas==0.23.3
    - jupyter


- Get more control on read by specifying chunksize to read each time:

In [66]:
with open('av.yml') as f:
    chunksize = 30
    block = f.read(chunksize)
    print(block)
    block = f.read(chunksize)
    print(block)

name: av
channels:
    - https
://conda.anaconda.org/menpo
  


Once it reaches end of file, it will return empty string.
 - We might not know exact file size, here's a more efficient implementations:

In [68]:
with open('av.yml') as f:
    chunksize = 30
    block = f.read(chunksize)
    while(len(block)>0):
        print(block,end='*')
        block = f.read(chunksize)        

name: av
channels:
    - https*://conda.anaconda.org/menpo
  *  - conda-forge
dependencies:
*    - python==3.7.0
    - matp*lotlib==2.2.2
    - numpy==1.1*5.0
    - pandas==0.23.3
    -* jupyter
*

We can also manipulate positions of pointer in the file:

In [70]:
with open('av.yml') as f:
    chunksize = 10
    block = f.read(chunksize)
    print(block)
    print(f.tell())

name: av
c
10


In [78]:
with open('av.yml') as f:
    chunksize = 10
    block = f.read(chunksize)
    print(block,end='*')
    f.seek(0)
    block = f.read(chunksize)
    print(block,end='*')
    f.seek(f.tell()+2)
    block = f.read(chunksize)
    print(block,end='*')

name: av
c*name: av
c*nnels:
   *

- FILE  MODES:
    - r: read
    - w: write
    - r+: read/write
    - a: append
    - rb, wb, ab: read/write/append in binary mode file

In [80]:
with open('av.yml') as rf, open('av_test.yml','w') as wf:
    for line in rf:
        wf.write(line)

- For large files or binary files, it is neither feasible to read one line at a time(IO operations more costly) nor to read all at once.
- So we read in chunks.

In [None]:
with open('python_basic.ipynb') as rf, open('python_basic_test.ipynb','w') as wf:
    chunksize = 2000
    chunk = rf.read(chunksize)
    while len(chunk) > 0:
        wf.write(chunk)
        chunk = rf.read(chunksize)

In [None]:
with open('python_basic.ipynb') as rf, open('python_basic_test.ipynb','w') as wf:
    for l in rf:
        wf.write(l)

**Match the execution time of above 2 approaches**

## OS Module - interacting with the OS

In [8]:
import os

### Working with directory

In [19]:
print(os.getcwd()) #present working directory
os.chdir('/home/ubuntu/Downloads/') #change directory
print(os.getcwd())

/home/ubuntu/Documents
/home/ubuntu/Downloads


In [20]:
os.listdir('./scripts/') #contents of working directory

['setup.sh', 'README.md', 'scripts']

In [83]:
os.chdir('/home/ubuntu/Documents/av_2018/')
os.mkdir('temp')

In [84]:
os.listdir()

['av_test.yml',
 '.ipynb_checkpoints',
 '.git',
 'python_basic.ipynb',
 'temp',
 '.gitignore',
 'av.yml']

Creating directory with sublevels

In [85]:
os.makedirs('temp2/subdir1/subdir3') #More preferable

In [86]:
print(os.listdir())
os.listdir('temp2')

['av_test.yml', '.ipynb_checkpoints', '.git', 'python_basic.ipynb', 'temp2', 'temp', '.gitignore', 'av.yml']


['subdir1']

In [87]:
#Removing directories
os.rmdir('temp') #doesn't delete non-empty directories - more preferable
os.removedirs('temp2/subdir1/subdir3/') #deletes entire tree along with intermediate directories

In [88]:
os.listdir()

['av_test.yml',
 '.ipynb_checkpoints',
 '.git',
 'python_basic.ipynb',
 '.gitignore',
 'av.yml']

In [89]:
os.makedirs('temp')
os.rename('temp', 'temp10')

In [91]:
os.listdir()
os.rmdir('temp10')

### File Stats: (helps in webapps to track timestamps of modifed files)

In [92]:
os.stat('./av.yml')

os.stat_result(st_mode=33204, st_ino=31330495, st_dev=2051, st_nlink=1, st_uid=1000, st_gid=1000, st_size=189, st_atime=1533443685, st_mtime=1533443681, st_ctime=1533443681)

In [93]:
from datetime import datetime
modified_time = os.stat('av.yml').st_mtime
print('Last Modified time:',datetime.fromtimestamp(modified_time))

Last Modified time: 2018-08-05 10:04:41.045577


### Traversing Directory tree


In [94]:
directory_path = '/home/ubuntu/Documents/'
os.walk(directory_path) # generator object - dir_path, subdirs, files

<generator object walk at 0x7f14bc63b8e0>

In [95]:
for dir_path, subdirs, files in os.walk(directory_path):
    print('Current Path:', dir_path)
    print('Directories:', subdirs)
    print('Files:', files)
    print()

Current Path: /home/ubuntu/Documents/
Directories: ['av_2018', '.ipynb_checkpoints', 'python4ds', 'temp10']
Files: ['import_data.pdf', 'TensorFlow Tutorial For Beginners.ipynb', 'pandas_practice.ipynb', 'sklearn_cs.png', '2702d58a-2dca-4135-a10c-2328377d2ff4-original.jpeg', '46c3de34-0c01-41ac-b934-24e0dc012ee2-original.jpeg']

Current Path: /home/ubuntu/Documents/av_2018
Directories: ['.ipynb_checkpoints', '.git']
Files: ['av_test.yml', 'python_basic.ipynb', '.gitignore', 'av.yml']

Current Path: /home/ubuntu/Documents/av_2018/.ipynb_checkpoints
Directories: []
Files: ['PythonForDataScience-checkpoint.ipynb', 'python_basic-checkpoint.ipynb']

Current Path: /home/ubuntu/Documents/av_2018/.git
Directories: ['hooks', 'info', 'branches', 'refs', 'objects', 'logs']
Files: ['index', 'config', 'description', 'ORIG_HEAD', 'FETCH_HEAD', 'HEAD']

Current Path: /home/ubuntu/Documents/av_2018/.git/hooks
Directories: []
Files: ['prepare-commit-msg.sample', 'pre-commit.sample', 'update.sample', 'pr

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/9b
Directories: []
Files: ['f4c5576a845f0dd73baf7f50e78fcb1c05cb75']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/98
Directories: []
Files: ['37c49ccb6eaea5a56682b609094bbc3d338fd1']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/22
Directories: []
Files: ['212be989571e08a2880a32d296497e9f0b7126']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/e2
Directories: []
Files: ['9039283a3b36929b6d539aa1a542db22e7a4c0']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/94
Directories: []
Files: ['d4e1996ea659427e4c9b8d500684ffb0894f3f']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/3a
Directories: []
Files: ['3dde8212c8eddfae67bddec62ebf1dc00cb8a3']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/90
Directories: []
Files: ['8dfbb23edfb4192d9b1b2c45175c07e64c9cb4']

Current Path: /home/ubuntu/Documents/python4ds/.git/objects/3b
Directories: []
Files: ['81

This also helps in locating some file deep within a directory

### Accessing Environment variables

In [96]:
os.environ.get('HOME')

'/home/ubuntu'

### Working with Path

In [97]:
filename = 'test.txt'
filepath = os.path.join(os.environ.get('HOME'), filename) #handles slashes
print(filepath)

/home/ubuntu/test.txt


In [98]:
print('Base name:', os.path.basename('/home/ubuntu/test.txt')) #final component
print('Directory name:', os.path.dirname('/home/ubuntu/test.txt'))
if not os.path.exists(filepath):
    print('Path does not exist')

Base name: test.txt
Directory name: /home/ubuntu
Path does not exist


In [101]:
print(os.path.split(filepath)) #Splits into head, tail(after final slash)    
print(os.path.isdir('./.git'))
print(os.path.isfile('./.git'))
print(os.path.isfile('./.ipynb_checkpoints/python_basic-checkpoint.ipynb')) #False if doesnt exist
print(os.path.splitext('./av.yml')) #split into root and extension

('/home/ubuntu', 'test.txt')
True
False
True
('./av', '.yml')
