<p>The first ever project in Python, made by Gino Ferretti</p>

<h1>File organizer</h1>
<h2>Step 1</h2>

<p>Organize the files inside the folder named "files" in subfolders such as docs, audios, images. Create a recap file inside "files" folder.</p>

In [1]:
#in case tabulate is not installed, uncomment and run
#conda install tabulate

In [2]:
#importing the necessary libraries

import os
#from os import walk
#from os.path import join, getsize, abspath
import shutil
import csv

import numpy as np
import PIL
from PIL import Image
from tabulate import tabulate

In [3]:
#getting the path, the folder names inside and the files inside the folder named 'files'

dirpath, dirnames, filenames = next(os.walk(os.path.abspath('files')))
#check what we have got

print ('dirpath: {}\ntype: {}\n'.format(dirpath,type(dirpath)))
print ('dirnames: {}\ntype: {}\n'.format(dirnames,type(dirnames)))
print ('filenames: {}\ntype: {}\n'.format(filenames,type(filenames)))

#in dirpath there is the path, saved as string
#in dirnames the names of the folders inside 'files', as list
#in filenames the names of the files inside 'files', as list

dirpath: C:\Users\Xmoca\files
type: <class 'str'>

dirnames: ['.ipynb_checkpoints']
type: <class 'list'>

filenames: ['bw.png', 'ciao.txt', 'daffodil.jpg', 'eclipse.png', 'pippo.odt', 'song1.mp3', 'song2.mp3', 'trump.jpeg']
type: <class 'list'>



In [4]:
#sorting elements in filenames in alphabetic order
filenames.sort()
filenames

['bw.png',
 'ciao.txt',
 'daffodil.jpg',
 'eclipse.png',
 'pippo.odt',
 'song1.mp3',
 'song2.mp3',
 'trump.jpeg']

In [5]:
#creating a dictionary to assign each 'file type' to the correspondent extension
#It would have been more efficient to find this association automatically in the following loop,
#also beacuse it could deal with extension outside this dictionary.
#however I didn't figure out how to do that; so I created manually this dictionary.

dictionary = {'.jpeg': 'image',
              '.jpg': 'image',
              '.mp3': 'audio',
              '.png': 'image',
              '.txt': 'doc',
              '.odt': 'doc', }
#quick check

dictionary

{'.jpeg': 'image',
 '.jpg': 'image',
 '.mp3': 'audio',
 '.png': 'image',
 '.txt': 'doc',
 '.odt': 'doc'}

In [6]:
#this function assign to 'newpath' the path of the folder in which the file should be moved
#if this path does not exists (meaning that the folder does not exist), than it creates a new one with 'folder_name'

def new_folder(dirpath, file_type):
    
    folder_name = file_type + 's'
    newpath = os.path.join(dirpath, folder_name)
    
    if not os.path.isdir(newpath):        
        os.makedirs(os.path.join(dirpath, folder_name))
    
    return newpath

#this function creates a new csv file if this is not already present in the folder files.
#if the csv is already present it just append the new row at the end of the file

def createcsv(filecsv_name, file, file_type, size):

    if os.path.exists(os.path.join(os.path.abspath('files'),filecsv_name)):
        app_or_write = 'a' #append
    else:
        app_or_write = 'w' #write
 
    print('app_or_write: ', app_or_write) #print to be sure that we are not creating a new file everytime

    with open(os.path.join(os.path.abspath('files'),filecsv_name), app_or_write, newline='') as csvfile:
        recap = csv.writer(csvfile)
        
        #writing headers only if we are creating the file
        if app_or_write == 'w':
            recap.writerow(['name', 'type', 'size(B)'])
        #writing the data about the file moved    
        recap.writerow([file, file_type, size])

In [7]:
#loop every file in filenames, printing: -the name  -the size  -the type

for file in filenames:
    
    #when find the 'recap file', that is a csv just pass to next iteration
    if file.endswith('.csv'):
        continue
    
    print('\nName: ', file)
    
    size = os.path.getsize(os.path.join(dirpath, file))
    print('Size:  {} B '.format(size))
    
    #checking in the dictionary the file type according to the extension
    
    for key, value in dictionary.items():
        if file.endswith(key):
            file_type = value
            break #just go out from the for when find correspondance, no need to iterate more
        
    print('Type: ', file_type)
    
    #calling the function new_folder
    newpath = new_folder(dirpath, file_type)
    #moving the file in the corresponding folder
    shutil.move(os.path.join(dirpath, file), os.path.join(newpath, file))
        
    #writing in a recap.csv file the information about the file, using the function createcsv    
    filecsv_name = 'recap.csv'
    createcsv(filecsv_name, file, file_type, size)


Name:  bw.png
Size:  94926 B 
Type:  image
app_or_write:  w

Name:  ciao.txt
Size:  12 B 
Type:  doc
app_or_write:  a

Name:  daffodil.jpg
Size:  24657 B 
Type:  image
app_or_write:  a

Name:  eclipse.png
Size:  64243 B 
Type:  image
app_or_write:  a

Name:  pippo.odt
Size:  8299 B 
Type:  doc
app_or_write:  a

Name:  song1.mp3
Size:  1087849 B 
Type:  audio
app_or_write:  a

Name:  song2.mp3
Size:  764176 B 
Type:  audio
app_or_write:  a

Name:  trump.jpeg
Size:  10195 B 
Type:  image
app_or_write:  a


<h2>Step 3</h2>

<p>Iterate all the images inside the folder "images" and return a table indicating: height, width, mean values of: grayscale, red, blue, green, alpha. Depending on the the image, if it is black and white, RGB or RGB 4.</p>

In [8]:
#getting the path of the folder "images" (note that dirpath comes from step 1)

imgpath = os.path.join(dirpath, 'images')
imgpath

'C:\\Users\\Xmoca\\files\\images'

In [9]:
#execute all the code only if the folder "images" exists, if not say that doesn't exists and not execute the code

if os.path.isdir(imgpath):
    #in this case we get only a list of the images inside the folder
    __, __, images = next(os.walk(imgpath))
    print ('imgpath: {}\ntype: {}\n'.format(imgpath,type(imgpath)))
    print ('images: {}\ntype: {}\n'.format(images,type(images)))
else:
    print('There is not a folder called "images"')

imgpath: C:\Users\Xmoca\files\images
type: <class 'str'>

images: ['bw.png', 'daffodil.jpg', 'eclipse.png', 'trump.jpeg']
type: <class 'list'>



In [10]:
#execute all the code only if the folder "images" exists, if not say that doesn't exists and not execute the code
if os.path.isdir(imgpath):
    
    #table inizialization (list of lists, necessary later)
    table = [['name', 'height', 'width', 'grayscale', 'R', 'G', 'B', 'ALPHA']]
        
    for image in images:
        
        if not images:
            print('There are no images in the folder "images"')
            break #just go out from the for, when there are no images inside the folder "images"

        print(image)
        #open the image with Image from PIL and assign to img
        img = Image.open(os.path.join(imgpath, image))
        #transform img in an array called np_img (numpy method np.array())
        np_img = np.array(img)
        
        #print the features
        print('shape: ', np_img.shape)
        print('ndim: ', np_img.ndim)
        print('size: ', np_img.size)

        #all the images will have height and width --> we can get from .shape
        height = np_img.shape[0]
        width = np_img.shape[1]
        
        #if the image is in black&white will have ndim == 2, so we get the mean of the only color level (grayscale)
        if np_img.ndim == 2:
            grayscale = '{:.2f}'.format(np_img[:, :].mean())
            print('grayscale: ', grayscale)
            R = 0
            G = 0
            B = 0
            ALPHA = 0
            
        #otherwise will have ndim == 3, so we get the mean of the other levels of color (RGB)
        else:
            grayscale = 0
            R = '{:.2f}'.format(np_img[:, :, 0].mean())
            G = '{:.2f}'.format(np_img[:, :, 1].mean())
            B = '{:.2f}'.format(np_img[:, :, 2].mean())
            ALPHA = 0
            print('red: ', R)
            print('green: ', G)
            print('blue: ', B)
        
        #in case the image is RGB 4 there is another color level: ALPHA
        #just see if asking the fourth level doesn't return error, it means that there is the alpha level, so we get it
        #if return error just pass over
        #note that we are still inside the "else"
            try:
                ALPHA = '{:.2f}'.format(np_img[:, :, 3].mean())
                print('ALPHA: ', ALPHA)
            except:
                pass

        print('\n')
        
        #append to the table the featurs of the analyzed image
        table.append([image, height, width, grayscale, R, G, B, ALPHA])

    #print(table) #just to check
    print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))   #print a "fancy" table with tabulate
    
else:
    print('There is not a folder called "images"')


bw.png
shape:  (512, 512)
ndim:  2
size:  262144
grayscale:  21.48


daffodil.jpg
shape:  (500, 335, 3)
ndim:  3
size:  502500
red:  109.25
green:  85.56
blue:  4.97


eclipse.png
shape:  (256, 256, 4)
ndim:  3
size:  262144
red:  109.05
green:  109.52
blue:  39.85
ALPHA:  133.59


trump.jpeg
shape:  (183, 275, 3)
ndim:  3
size:  150975
red:  97.01
green:  98.99
blue:  90.92


╒══════════════╤══════════╤═════════╤═════════════╤════════╤════════╤═══════╤═════════╕
│ name         │   height │   width │   grayscale │      R │      G │     B │   ALPHA │
╞══════════════╪══════════╪═════════╪═════════════╪════════╪════════╪═══════╪═════════╡
│ bw.png       │      512 │     512 │       21.48 │   0    │   0    │  0    │    0    │
├──────────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ daffodil.jpg │      500 │     335 │        0    │ 109.25 │  85.56 │  4.97 │    0    │
├──────────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ ec