In [2]:
import os
import shutil
import csv
import numpy as np
from PIL import Image
from tabulate import tabulate

In [None]:
# In this notebook I will use relative paths in such a way that no matter where the "files" folder is
# The important thing is that the subfolders and files are all contained in the "files" folder
files_list = os.listdir("files")  # List of the files and folders contained in the "files" directory

# Dictionary to identify the extension and associating them with a key based on type
extensions = {
    "Images": [".jpg", ".jpeg", ".png"],
    "Docs": [".doc", ".txt", ".odt"],
    "Audio": [".mp3"]
}

# Opening the csv, the keyword "with" allows me to automatically close the file without having to specify it, 
# The arguments of the open function are respectively the path where I want the csv to be positioned; 
# The mode: in this case "a" for append, so as not to overwrite the csv every time the script is run, but only updating it in order; 
# "Encoding" in case there are special characters, "new line" to avoid a blank line. "f" will be the name by which I refer to the handler
with open("files/recap.csv", "a", encoding="UTF8", newline="") as f:

    # Creating a writer using the DictWriter csv module class, 
    # Thanks to which together with a dictionary I will write the information in the csv. "fieldnames" will be my header fields.
    writer = csv.DictWriter(f, fieldnames=["name", "type", "size(B)"])
    if f.tell() == 0:  # Checking if the csv file is empty or not
        writer.writeheader()  # If it's empty, I write a header
    
    # Loop in the extension dictionary, by default the loop variable is the key name of the dictionary                                          
    for key in extensions:  

        # I check that there is a directory for the extension types supported by the script, if there is not, I create it.
            if not os.path.isdir(f"files/{key}"):
                os.makedirs(f"files/{key}")
        
            for file in files_list:  # Looping the sorted file list obtained with listdir ()
                # Check if the value assumed by the variable is a file or not
                if os.path.isfile(f"files/{file}"):  
                    # Isolating the extension and filename into two variables                                
                    file_name, extension = os.path.splitext(file)  
                        # Checking that the file extension is supported
                    if extension in extensions[key]:     
                        src = f"files/{file}"  # Creating a variable with the original path
                        dst = f"files/{key}/{file}"  # And one with the destination path
                        shutil.move(src=src, dst=dst)  # Moving the file from source to destination

                        # I create a list containing a dictionary with the information to be included in the csv
                        #  I need an iterable to pass to writer.writerows
                        info = [{
                            "name": file_name,
                            "type": key,
                            "size(B)": os.path.getsize(dst)
                        }]
                        writer.writerows(info)  # Writing data of the moved file in the csv
                        print(f"{info[0]['name']} type: {info[0]['type']} size: {info[0]['size(B)']}B")  
                    # If it is not supported, I report it to the user
                    else:
                        print(f"the extension {extension} is not yet supported")

bw.png type: Images size: 94926B
ciao.txt type: Docs size: 12B
daffodil.jpg type: Images size: 24657B
eclipse.png type: Images size: 64243B
pippo.odt type: Docs size: 8299B
song1.mp3 type: Audio size: 1087849B
song2.mp3 type: Audio size: 764176B
trump.jpeg type: Images size: 10195B


In [4]:


# Creating a list of the files contained in the docs folder
images_list = os.listdir("files/Images")

# I create a dictionary with the information to be inserted in the table, the keys will be the header, 
# The values ​​will be a list that will be processed by tabulate assigning them respectively to each processed file
table = {
        "name": [],
        "height": [],
        "width": [],
        "grayscale" : [],
        "R" : [],
        "G" : [],
        "B" : [],
        "ALPHA" : []   
    }

# Iterating over files into the folder, get the path, process it to Image.open, which will create an Image object for me
for file in images_list:
    path = f"files/Images/{file}"
    img = Image.open(path)
    
    # Assigning values ​​to the "name" key via os.path.splitext which will isolate the file name from the extension. 
    # I get a list containing the name of the file at index 0, which I access with a list slicing
    table["name"].append(os.path.splitext(file)[0])
    
    # I create my array with the np.array function and as argument the Image object obtained before
    np_img = np.array(img)
    
    # I assign the "height" and "width" values ​​obtained with array.shape, which gives me the number 
    # Of rows and columns of the array, ie the number of pixels in height and width. 
    # Array.shape gives me a tuple, so I access the respective values ​​with a slicing
    table["height"].append(np_img.shape[0])
    table["width"].append(np_img.shape[1])
    
    # Images can be in grayscale, rgb, or rbga. A grayscale image will have a size equal to 2 (2x2 matrix), 
    # An RGB and an RGBA will have dimension 3 (2x2 matrix where each value will be a tuple (x, y, z), 
    # So isolating we will have three 2x2 matrices where each will represent red, green, and blue respectively). 
    # Same goes for the image with the alpha channel, but in this case we will have four 2x2 matrices, 
    # Where the fourth will have the alpha values. First of all I try to understand if the file is a grayscale 
    # Image or not through an if statement by checking the size of the array
    if np_img.ndim < 3:
        # I then assign to the "grayscale" key the average value obtained from the pixels of all the 
        # Rows and all the columns (:, :) by slicing
        table["grayscale"].append(np.mean(np_img))
        # As there are no other channels, all other values ​​will have a value of 0
        for key in ["R", "G", "B", "ALPHA"]:
            table[key].append(0)
    else:
        # In the opposite case, if the dimension is greater than 2, grayscale will be equal to 0
        table["grayscale"].append(0)
        
        # For color images, I calculate the average using the "axis = (0,1)" parameter. In this 
        # Way I am calculating the average by collapsing the rows (axis = 0) and the columns (axis = 1)
        # Obtaining a tuple (x, y, z) where each value is respectively the average per color level.
        mean_tup = np.mean(np_img, axis=(0,1))
        # I then assign the respective average values ​​to the table
        for n, key in enumerate(["R", "G", "B"]):
            table[key].append(mean_tup[n])
        
        # For the alpha channel image, I use the same "mean_tup" variable, which will return me a tuple 
        # (x, y, z, w). The concept then repeats itself as above
        if np_img.shape[2] == 4:
            table["ALPHA"].append(mean_tup[3])
        else:
            # Alternatively, the average alpha value will be 0
            table["ALPHA"].append(0)
            
# I print the information obtained in a table created with the tabulate module. 
# Table is the name of the dictionary created at the beginning of the script, 
# its elements (lists) are inserted in order according to their key, 
# Which is written as "Headers" by assigning Dict.keys() to the "headers" parameter. 
# "tablefmt" is simply the aesthetics of the grid. I need "floatfmt" to round off 
# the values ​​to be entered to two decimal places. The function by defaults aligns to the left 
# text for strings, and to the right numbers, and automatically 
# converts any strings it recognizes could be numbers into numbers.
print(tabulate(table, headers=table.keys(), tablefmt="fancy_grid",floatfmt=".2f"))


    

╒══════════╤══════════╤═════════╤═════════════╤════════╤════════╤═══════╤═════════╕
│ name     │   height │   width │   grayscale │      R │      G │     B │   ALPHA │
╞══════════╪══════════╪═════════╪═════════════╪════════╪════════╪═══════╪═════════╡
│ bw       │      512 │     512 │       21.48 │   0.00 │   0.00 │  0.00 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ daffodil │      500 │     335 │        0.00 │ 109.25 │  85.56 │  4.97 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ eclipse  │      256 │     256 │        0.00 │ 109.05 │ 109.52 │ 39.85 │  133.59 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ trump    │      183 │     275 │        0.00 │  97.01 │  98.99 │ 90.92 │    0.00 │
╘══════════╧══════════╧═════════╧═════════════╧════════╧════════╧═══════╧═════════╛
