* Getting started with `os`
* Manage folders
* Validate paths
* Parsing paths
* Walk through folder recursively
* Overview of `glob`
* Get file names from a given folder
* Get file names recursively
* Exercises and Solutions

In [None]:
# Getting started with os
import os

In [None]:
# os.path
# os.environ
# os.mkdir
# os.makedirs
# os.walk
# os.system

In [None]:
# Manage folders
help(os.mkdir)
help(os.makedirs)

In [None]:
# Validate paths
help(os.path.isdir)

In [None]:
os.path.isdir('data/retail_db')

In [None]:
os.path.isdir('data/README.md')

In [None]:
help(os.path.isfile)

In [None]:
os.path.isfile('data/retail_db')

In [None]:
os.path.isfile('data/README.md')

In [None]:
# Parsing paths
help(os.path.split)

In [None]:
os.path.split('data/retail_db/orders/part-00000')


In [None]:
help(os.path.splitext)

In [None]:
os.path.splitext('data/nyse_all/nyse_data/NYSE_1997.txt.gz')

In [None]:
# Walk through the folder recursively
help(os.walk)

In [None]:
os.walk('data') # walks through data folder recursively

In [None]:
for i in os.walk('data/retail_db'):
    print(i) # tuple with path, [folders] and [files]

In [None]:
# Overview of glob
import glob

In [None]:
help(glob.glob)

In [None]:
# Get file names from a given folder
glob.glob('data/retail_db/*')

In [None]:
import os

In [None]:
for path in glob.glob('data/retail_db/*'):
    if os.path.isfile(path):
        print(path)

In [None]:
# Get file names recursively
glob.glob('data/retail_db/**', recursive=True)

In [None]:
glob.glob('data/**/part*', recursive=True)

In [None]:
for path in glob.glob('data/retail_db/**', recursive=True):
    if os.path.isfile(path):
        print(path)

* Exercise 1: Get number of files in `data/retail_db`

* Exercise 2: Get the files which are compressed using `zip` or `gz` from `data` folder.

* Exercise 3: Get the count of **files** whose names start with `part` from `data` folder. You need to consider the name of the file not the path.

* Exercise 4: Create folders for all the months between 2022 January and 2023 March. The folders should be created under `data/sales/`. Here is an example for 2022 January folder `data/sales/sales_202201`.

```python
from datetime import datetime as dt
from dateutil.relativedelta import relativedelta

def get_folder_suffixes(lb, ub):
    folder_suffixes = []
    while lb < ub:
        suffix = dt.strptime(lb, '%Y-%m-%d').strftime('%Y%m')
        folder_suffixes.append(suffix)
        lb = str(dt.strptime(lb, '%Y-%m-%d').date() + relativedelta(months=+1))
    return folder_suffixes

lb = '2022-01-01'
ub = '2023-03-31'

get_folder_suffixes(lb, ub)
```