SPECIFICHE PROGETTO

Creare, in un notebook, uno script Python che iteri in ordine alfabetico sui file della cartella files e, a seconda del tipo (audio, documento, immagine), li sposti nella relativa sottocartella di riferimento. 

Se la sottocartella non esiste, lo script dovrà crearla automaticamente.

Durante il ciclo, lo script stampera' le informazioni dei file: nome, tipo e dimensione in byte.

Via via che sposta i file, lo script terra' anche traccia dei file creando un documento recap.csv con le stesse informazioni. 

Attenzione: lo script, ogni volta che viene lanciato per spostare nuovi file, aggiornera' (e non sovrascrivera') le sottocartelle e il file di recap. 

In [57]:
import os
import sys
from shutil import move, Error 
import csv
import itertools

# Verifies if the directory of reference for file search ends with ...FileOrganizer
cur_dir = os.getcwd()
if not cur_dir.endswith('FileOrganizer'):
    sys.exit(" Please leave this notebook under \FileOrganizer directory after you have unzipped it.")

       
file_info = {'name': '', 'type': '', 'size': 0 }   # will contain file's info such as name, type and size

extentions = {'images':['.png', '.jpg', '.jpeg'], 'docs':['.txt', '.odt'], 'audio':['.mp3']}

errors = []                                        # this list will collect the catched errors 

# Extracts the sorted list of files present in directory \files
dir_files = os.path.join(cur_dir, 'files')
if not os.path.isdir(dir_files):
    os.makedirs(dir_files)
    sys.exit("Please fill up the directory /files with new files")
    
src_names = sorted(os.listdir(dir_files)) 

# Builds the path to the destination directories (image, audio, doc)
# if the destination directories doesn't exist, os module creates them
for subfolder in ['audio', 'images', 'docs']:
    new_dir = os.path.join(dir_files, subfolder)
    if not os.path.isdir(new_dir):
        os.makedirs(new_dir)

# Creates the csv file if it doesn't exists and writes down the csv columns header
csv_file = os.path.join(dir_files, 'recap.csv')   
if not os.path.isfile(csv_file):
    with open('files/recap.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(['name'] + ['type'] + ['size(B)']) 

try:
    csvfile = open('files/recap.csv', 'a', newline='')
    writer = csv.writer(csvfile, delimiter=',')
    
    for name in src_names:                      # loops throught the files present in directory "cur_dir"
        
        basename, ext = os.path.splitext(name)  # returns a tuple of root and extension
        file_info['name'] = basename            # extracts from the tuple only the file name (no ext) and insert it into the dict.
        extention = ext                         # extracts from the tuple only the extension
    
        folder = [ key for key, value in extentions.items() if extention in value]        # a list that contains the name of the destination folder
        if folder != []:                  
            dst_dir_name = folder[0]            # defines in which folder the file will be moved
        elif extention == '':
            continue                            # if extension is NULL, it means that the listed value is a directory not a file
        else:
            print("In order to be processed, the file must have one of the following extentions: \n" 
                  + str(list(itertools.chain(extentions['images'],extentions['docs'], extentions['audio'])))) # the file doesn't have an extention listed in "extentions" -> go to the next file
            continue
        
        if dst_dir_name != 'audio':
            file_info['type'] = dst_dir_name[:-1]      # removes the last "s" from words "images" and "docs"
        else:
            file_info['type'] = dst_dir_name           # if dst_dir_name is "audio" it doesn't require trimming
        
        dst_dir = os.path.join(dir_files, dst_dir_name)  # composes the destination path (stopping at dir level)
        dst_file = os.path.join(dst_dir, name)         # composes the complete destination path containing also the name of the file
        
        src_file = os.path.join(dir_files, name)         # composes the path of the file present in directory \files
        
        file_info['size'] = os.path.getsize(src_file)  # gets the size in bytes of the src_file
    
        if not os.path.isfile(dst_file):               # if the file doesn't already exist in the directory, os module moves it
            move(src_file, dst_dir)
        else:
            print("The file %s already exists in the destination directory" %name)
            continue
        
        # prints file's info: name, type and size 
        print("%s type:%s size:%dB" % (file_info['name'], file_info['type'], file_info['size']))   
        
        # writes file's info into the recap.csv file
        writer.writerow([file_info['name'],file_info['type'],str(file_info['size'])])

    csvfile.close()
    
# catch the errors so that we can continue with other files
except Error as err:
    errors.append("Shutil Error: " + err.args[0])            

except OSError as err:
    errors.append("OSError: " + err.args[0])   

if errors:
    raise Error(errors)

bw type:image size:94926B
ciao type:doc size:12B
daffodil type:image size:24657B
eclipse type:image size:64243B
pippo type:doc size:8299B
song1 type:audio size:1087849B
song2 type:audio size:764176B
trump type:image size:10195B


Quest'altro script, iterirera' i files presenti nella sottocartella images e costruira' una tabella riassuntiva che riporta (prodotta con la libreria tabulate):
'- altezza dell'immagine, in pixel
'- larghezza dell'immagine, in pixel
'- se l'immagine è in scala di grigio, la colonna grayscale indica la media dei valori dell'unico livello di colore
'- se l'immagine è a colori, le altre colonne indicano la media dei valori di ogni livello di colore.

NOTA: 
Il modulo Image della libreria PIL permette di caricare un'immagine, che può essere trasformata in un array NumPy attraverso la funzione np.array. 
A partire da tale array, è possibile capire se l'immagine caricata è in scala di grigio, RGB o RGBA.


In [54]:
import os
import numpy as np
from tabulate import tabulate
from PIL import Image

# Verifies which is the current directory 
cur_dir = os.getcwd()
if not cur_dir.endswith('images'):
    cur_dir = os.path.join(cur_dir,'files/images')

# Lists the files present in directory \images
image_names = os.listdir(cur_dir)

table = []    # this list hold other lists containing image info to be added into the table
errors = []   # this list will collect the catched errors

try:
    for infile in image_names:                     # loops throught the files inside folder \images
        image_info = []                            # at each iteration is created a list that will contain image info
    
        image_name = os.path.splitext(infile)[0]   # retrieves the filename (without extention)
        image_info += [image_name]                 # adds the filename into image info list
        
        image = os.path.join(cur_dir, infile)      # created the complete path to image file
        
        with Image.open(image) as im:
            np_image = np.array(im)                # converts the image opened with the PIL library into a NDIM ARRAY
            
            image_shape = np_image.shape           # retrieves image shape (H, W, [DIM])
            image_info += image_shape[0:2]         # adds the hight and width into image info list
        
            image_mean = np.mean(np_image, axis=(0, 1)) # computes image mean first along each of the columns and then along each of the rows
            
            if np_image.ndim == 2:                     # checks if it is a grayscale image
                image_info += [image_mean]
                image_info += [0.0, 0.0, 0.0, 0.0]
            elif image_shape[2] == 3:                 # checks if is a RGB image
                image_info += [0.0]
                image_info += list(image_mean)
                image_info += [0.0]
            else:                                  # if not a grayscale or RGB image than it is a RGBA image; equivalent to (image_shape[2] == 4)
                image_info += [0.0]
                image_info += list(image_mean)
                
        table += [image_info]                      # add the image info list into the table list

    # prints image info in tabular form by calling tabulate method of tabulate library
    print(tabulate(table, headers=["name","height", "width", "grayscale", "R", "G", "B", "ALPHA"], tablefmt="fancy_grid", floatfmt=".2f"))

except IndexError as e:
    errors.append(e)        

except OSError as err:
    errors.append("OSError: " + err.args[0])   

if errors:
    raise Error(errors)


╒══════════╤══════════╤═════════╤═════════════╤════════╤════════╤═══════╤═════════╕
│ name     │   height │   width │   grayscale │      R │      G │     B │   ALPHA │
╞══════════╪══════════╪═════════╪═════════════╪════════╪════════╪═══════╪═════════╡
│ bw       │      512 │     512 │       21.48 │   0.00 │   0.00 │  0.00 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ daffodil │      500 │     335 │        0.00 │ 109.23 │  85.52 │  4.77 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ eclipse  │      256 │     256 │        0.00 │ 109.05 │ 109.52 │ 39.85 │  133.59 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ trump    │      183 │     275 │        0.00 │  97.01 │  98.99 │ 90.92 │    0.00 │
╘══════════╧══════════╧═════════╧═════════════╧════════╧════════╧═══════╧═════════╛
