### To extract and organise data after downloading the .zip file from the Google drive

After downloading the .zip files of a continent from google drive we want to extract all indivdual scenes along with their corresponding masks, mask_derivates and metadata in individual folders.

This program uses *Oceania.zip* and *Oceania_metadata.zip* that should be downloaded in a subfolder named Oceania. The paths can be changed according to the need.

A new *Images* folder will be created inside the *Oceania* subfolder containing the individual scenes. Each of folder in this directory will have *Scenes*, *Masks*, *Masks_derivates* (not in every scene) and *Metadata* subfolders

In [1]:
import os
from zipfile import ZipFile


In [3]:
parent= "Oceania" # change this according to the folder you want to extract


data_path= "Oceania.zip" # extension is .zip not .rar
  
# loading the temp.zip and creating a zip object
with ZipFile(os.path.join(parent, data_path), 'r') as zObject:
  
    # Extracting all the members of the zip 
    # into a specific location.
    zObject.extractall(path="Oceania/images_rar")
    
print("DONE exctracting all image files. Please check the images_rar folder")

data_path= "Oceania_metadata.zip"
  
# loading the temp.zip and creating a zip object
with ZipFile(os.path.join(parent, data_path), 'r') as zObject:
  
    # Extracting all the members of the zip 
    # into a specific location.
    zObject.extractall(path= os.path.join(parent, "metadata_rar"))
    
print("DONE exctracting all metadata files. Please check the metadata_rar folder")

DONE exctracting all image files. Please check the images_rar folder
DONE exctracting all metadata files. Please check the metadata_rar folder


In [4]:
file_list= os.listdir(path= os.path.join(parent, "images_rar"))

In [5]:
file_list

['z049066.zip',
 'z049066_masks.zip',
 'z049066_masks_derivates.zip',
 'z053072.zip',
 'z053072_masks.zip',
 'z063046.zip',
 'z063046_masks.zip',
 'z063046_masks_derivates.zip',
 'z065045.zip',
 'z065045_masks.zip',
 'z065045_masks_derivates.zip',
 'z067073.zip',
 'z067073_masks.zip',
 'z067073_masks_derivates.zip',
 'z067090.zip',
 'z067090_masks.zip',
 'z067091.zip',
 'z067091_masks.zip',
 'z069069.zip',
 'z069069_masks.zip',
 'z069069_masks_derivates.zip',
 'z069070.zip',
 'z069070_masks.zip',
 'z072086.zip',
 'z072086_masks.zip',
 'z072086_masks_derivates.zip',
 'z072087.zip',
 'z072087_masks.zip',
 'z072087_masks_derivates.zip',
 'z072088.zip',
 'z072088_masks.zip',
 'z072088_masks_derivates.zip',
 'z072089.zip',
 'z072089_masks.zip',
 'z072089_masks_derivates.zip',
 'z074071.zip',
 'z074071_masks.zip',
 'z074071_masks_derivates.zip',
 'z074072.zip',
 'z074072_masks.zip',
 'z074072_masks_derivates.zip',
 'z074073.zip',
 'z074073_masks.zip',
 'z074073_masks_derivates.zip',
 'z07408

In [6]:
current_file= ""
i=0
for index, file_name in enumerate(file_list):
    
    if not("masks" in file_name): 
        # current file is an archive of multiband images
        folder_name = file_name[:-4] # name of the file withouth the ".zip"
        
        os.makedirs(os.path.join(parent, "Images",folder_name)) 
        # make a new folder of name same as .zip file
        # this folder will have Scenes, Metadata, Masks etc. folders
        
        
        patches_rar = ZipFile(os.path.join(parent, "images_rar",file_name), 'r')
        patches_rar.extractall(path= os.path.join(parent, "Images",folder_name,"Scenes"))
        patches_rar.close()
        # extracing the mulitband scenes in Scenes folder
        
        if os.path.exists(os.path.join(parent, "images_rar",f"{folder_name}_masks.zip")):
            
            masks_rar= ZipFile(os.path.join(parent, "images_rar",f"{folder_name}_masks.zip"), 'r')
            masks_rar.extractall(path= os.path.join(parent, "Images",folder_name,"Masks"))
            masks_rar.close()
            # extracting masks inside Masks folder
            
        if os.path.exists(os.path.join(parent, "images_rar",f"{folder_name}_masks_derivates.zip")):
            
            masks_der_rar= ZipFile(os.path.join(parent, "images_rar",f"{folder_name}_masks_derivates.zip"), 'r')
            masks_der_rar.extractall(path= os.path.join(parent, "Images",folder_name,"Masks_derivates"))
            masks_der_rar.close()
            # extracting masks_derivates insider Masks_derivates folder
            
        if os.path.exists(os.path.join(parent, "metadata_rar",file_name)):
            
            meta_rar= ZipFile(os.path.join(parent, "metadata_rar",file_name), 'r')
            meta_rar.extractall(path= os.path.join(parent, "Images",folder_name,"Metadata"))
            meta_rar.close()
            # extracting metadata inside Metadata folder
            
        i+=1
            
print("DONE !", i, "Folders in Images")
        
    
    

DONE ! 190 Folders in Images
