# Handling Files in Python

### Backslashes and File Paths

Different Separators in File paths depending on OS:
- Windows: \
- macOS & Linux: /

To overcome this discrepancy use pathlib.Path()


In [54]:
from pathlib import Path

path = Path('this', 'is', 'a', 'path')
print(path)
print(type(path))

this/is/a/path
<class 'pathlib.PosixPath'>


In [55]:
# Join Paths using "/"
myFiles = ['example.txt', 'important.csv']

for filename in myFiles:
    print(Path('Users') / 'Gitstetter'/filename)

Users/Gitstetter/example.txt
Users/Gitstetter/important.csv


### Set & Retrieve Current Working Directory

In [103]:
import os
cwd = Path.cwd()
print(cwd)

/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python


### Absolute VS. Realtive Paths

There are two types of paths:
- Absolute paths: always beginn with the root folder
- Relative paths: relative to a program's cwd

In [57]:
# Check if a path is absolute
print(Path.cwd())
print(Path.cwd().is_absolute())

print(Path('not', 'absolute'))
print(Path('not', 'absolute').is_absolute())

/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python
True
not/absolute
False


In [58]:
# Make a relative path absolute
print(Path.cwd() / Path('not', 'absolute'))
# Or
print(os.path.abspath('./Test'))

/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/not/absolute
/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/Test


### Extracting Parts of a File Path

In [59]:
p = Path('/Users/Gitstetter/file.txt')

In [60]:
# Get root folder of filesystem
p.anchor

'/'

In [61]:
# Get parent Folder that contains the file
p.parent

PosixPath('/Users/Gitstetter')

In [62]:
# Get filename
p.name

'file.txt'

In [63]:
# Get base name of file
p.stem

'file'

In [64]:
# Get suffix of file
p.suffix

'.txt'

### List Folder Contents and get File Sizes

In [65]:
os.listdir(cwd)

['Files_Python.ipynb', 'mydata.db', '.ipynb_checkpoints']

In [66]:
os.path.getsize(cwd / 'Files_Python.ipynb')

8780

### Glob Patterns

Glob Patterns are like a simplified form of regexes used in command line tools.

The glob() method returns a generator object - pass them to list() to view them.

In [67]:
cwd.glob('*')

<generator object Path.glob at 0x7f9e82b47ed0>

In [68]:
# List all files using *
list(cwd.glob('*'))

[PosixPath('/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/Files_Python.ipynb'),
 PosixPath('/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/mydata.db'),
 PosixPath('/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/.ipynb_checkpoints')]

In [69]:
# Only list Jupyter Notebooks
list(cwd.glob('*.ipynb'))

[PosixPath('/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/Files_Python.ipynb')]

In [70]:
# ? stands for any single character
list(cwd.glob('Files_Pytho?.ipynb'))

[PosixPath('/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/Files_Python.ipynb')]

### Saving Variables with shelve Module

Save Variables to File 'mydata.db'

In [76]:
import shelve
shelfFile = shelve.open('mydata')
variables = ['var1', 'var2', 'var3']
name = ['Gitstetter']
# Treat shelfFile just like any dictionary
shelfFile['variables']  = variables
shelfFile['name'] = name
shelfFile.close()
del variables, name

Read Variable from File 'mydata.db'

In [77]:
shelfFile = shelve.open('mydata')
#List keys
print(list(shelfFile.keys()))
#List Values
print(list(shelfFile.values()))
variablesNew = shelfFile['variables']
print(variablesNew)

['variables', 'name']
[['var1', 'var2', 'var3'], ['Gitstetter']]
['var1', 'var2', 'var3']


### The shutil Module

In [108]:
import shutil # shell utilities

# Make new txt file
with open('example.txt', 'w') as File:
    File.write('Aloha!')

# Copy file to new location
shutil.copy('example.txt', cwd/'Folder')

# Copy & rename the file
shutil.copy('example.txt', cwd/'Folder'/'NEW.txt')

# Copy entire folder & all contents with shutil.copytree()
shutil.copytree(cwd , cwd/'backup')

# Move & Rename File
shutil.move(cwd/'example.txt', cwd/'backup'/'NewFile.txt')

# Delete .txt Files in a folder
for filename in (cwd/'Folder').glob('*.txt'):
    os.unlink(filename)

# Delete Folder & Contents
shutil.rmtree(cwd/'backup')

### Safe Delete with send2trash

In [110]:
import send2trash

# Make new txt file
with open('example.txt', 'w') as File:
    File.write('Aloha!')
    
send2trash.send2trash('example.txt')

### zipfile module

Create new Zipfile & and write a file to it:

In [122]:
import zipfile

newZip = zipfile.ZipFile('newzip.zip', 'w')
newZip.write('mydata.db', compress_type=zipfile.ZIP_DEFLATED)
newZip.close()

Read in Zipfile:

In [133]:
exampleZip = zipfile.ZipFile('newzip.zip')

# Get List of Files in Zip
print(exampleZip.namelist())
# Get Info of zipped Files
Info = exampleZip.getinfo('mydata.db')
print('File Size: ' + str(Info.file_size))
print('Compressed Size: ' + str(Info.compress_size))
print(f'Compressed File is {round(Info.compress_size/Info.file_size * 100,2)} % of actual File Size')
exampleZip.close()

['mydata.db']
File Size: 16384
Compressed Size: 180
Compressed File is 1.1 % of actual File Size


Extract Files of zip File

In [135]:
exampleZip = zipfile.ZipFile('newzip.zip')

# Extract whole content
exampleZip.extractall()

# Extract specified File
exampleZip.extract('mydata.db')
exampleZip.close

'/Users/andreasfussstetter/Jupyter Notebooks/Files_in_Python/mydata.db'