# CSV to ZIP file


# Este módulo tem o objetivo de carregar dados de arquivo zip sem descompactar arquivos em disco.

## Imports

In [1]:
import zipfile
from pathlib import Path
from tempfile import tempdir
from os import chdir, getcwd

## Variáveis

In [2]:
file = Path('PostgreSQLPython.zip')
file.exists()

True

## Exemplos de acesso

In [3]:
with zipfile.ZipFile(file) as zf:
    zf.printdir()

File Name                                             Modified             Size
PostgreSQLPython/~$PostgreSQLPython.pptx       2021-01-02 11:42:32          165
PostgreSQLPython/~$PythonSQLDW.pptx            2020-10-18 13:50:12          165
PostgreSQLPython/Notebook/                     2021-01-03 12:34:36            0
PostgreSQLPython/Notebook/Data/                2021-01-03 12:30:16            0
PostgreSQLPython/Notebook/Data/DimCurrency.csv 2020-01-05 11:24:40         2259
PostgreSQLPython/Notebook/Data/DimCustomer.csv 2020-01-03 17:25:12      4698932
PostgreSQLPython/Notebook/Data/DimCustomerTransactions.csv 2020-12-20 10:02:34          329
PostgreSQLPython/Notebook/Data/DimDate.csv     2020-01-03 16:45:10       357464
PostgreSQLPython/Notebook/Data/DimGeography.csv 2019-03-02 14:57:18        57275
PostgreSQLPython/Notebook/Data/DimProduct.csv  2018-10-20 08:54:48       152149
PostgreSQLPython/Notebook/Data/DimProductCategory.csv 2020-01-03 16:54:14          265
PostgreSQLPython/Not

In [4]:
with zipfile.ZipFile(file) as zf:
    for f in zf.filelist:
        print(f.filename)

PostgreSQLPython/~$PostgreSQLPython.pptx
PostgreSQLPython/~$PythonSQLDW.pptx
PostgreSQLPython/Notebook/
PostgreSQLPython/Notebook/Data/
PostgreSQLPython/Notebook/Data/DimCurrency.csv
PostgreSQLPython/Notebook/Data/DimCustomer.csv
PostgreSQLPython/Notebook/Data/DimCustomerTransactions.csv
PostgreSQLPython/Notebook/Data/DimDate.csv
PostgreSQLPython/Notebook/Data/DimGeography.csv
PostgreSQLPython/Notebook/Data/DimProduct.csv
PostgreSQLPython/Notebook/Data/DimProductCategory.csv
PostgreSQLPython/Notebook/Data/DimProductSubcategory.csv
PostgreSQLPython/Notebook/Data/DimSalesTerritory.csv
PostgreSQLPython/Notebook/Data/FactInternetSales.csv
PostgreSQLPython/Notebook/PythonPostgreSQL.ipynb
PostgreSQLPython/PostgreSQLPython.pptx


## Extração do conteúdo original no disco local

In [5]:
with zipfile.ZipFile(file) as zf:
    zf.extractall()

## Filtro para arquivos CSV

In [6]:
csvfiles = list(Path('PostgreSQLPython').rglob('**/*.csv'))
csvfiles

[PosixPath('PostgreSQLPython/Notebook/Data/DimProductCategory.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimCurrency.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimDate.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimProductSubcategory.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimGeography.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimSalesTerritory.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimCustomerTransactions.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimCustomer.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/DimProduct.csv'),
 PosixPath('PostgreSQLPython/Notebook/Data/FactInternetSales.csv')]

## Criação de  container para todos os CSV encontrados mantendo a estrutura original

In [7]:
with zipfile.ZipFile('datafile0.zip', 'w') as zf:
    for filename in csvfiles:
        zf.write(filename)

In [8]:
cwd = Path.cwd()
cwd

PosixPath('/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip')

## ZIP contendo CSV em nova estrutura

### Tratativas de navegação em Python

In [9]:
print(getcwd())
print(csvfiles[0].parent)
chdir(Path(csvfiles[0].parent))
print(getcwd())
chdir(cwd)

/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip
PostgreSQLPython/Notebook/Data
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data


## Criação de container para todos os CSV encontrados com nova estrutura

In [10]:
print(Path.cwd(), cwd)
chdir(Path(csvfiles[0].parent))

with zipfile.ZipFile(
    cwd / 'datafile.zip', 'w', compression=zipfile.ZIP_DEFLATED
) as zf:
    for filename in Path().rglob('**/*.csv'):
        print(filename)
        zf.write(filename)
    print(f'>> {zf.filename} finalizado.')
chdir(cwd)

/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip /home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip
DimProductCategory.csv
DimCurrency.csv
DimDate.csv
DimProductSubcategory.csv
DimGeography.csv
DimSalesTerritory.csv
DimCustomerTransactions.csv
DimCustomer.csv
DimProduct.csv
FactInternetSales.csv
>> /home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/datafile.zip finalizado.


## Criação de container para todos os CSV individualmente

### Zip/CSV com paridade 1/1

In [12]:
pwd = Path.cwd()
print(pwd)
for filename in Path().absolute().rglob('**/*.csv'):
    print(filename.parent, filename.name, filename.stem)
    with zipfile.ZipFile(
        pwd.joinpath(filename.name).with_suffix('.zip'),
        'w',
        compression=zipfile.ZIP_DEFLATED,
    ) as zf:
        chdir(filename.parent)
        zf.write(filename.name)
chdir(pwd)
print(Path.cwd())

/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimProductCategory.csv DimProductCategory
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimCurrency.csv DimCurrency
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimDate.csv DimDate
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimProductSubcategory.csv DimProductSubcategory
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimGeography.csv DimGeography
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimSalesTerritory.csv DimSalesTerritory
/home/jovyan/work/incolume/academia_jedi/ajedi20221113_csv2zip/PostgreSQLPython/Notebook/Data DimCustomerTransactions.csv DimCustomerTransactions
/home/jovyan/work/i

## Acesso a conteúdo Zipado

### via pandas

In [16]:
import pandas as pd

In [17]:
df = pd.read_csv('DimCurrency.zip')
df

Unnamed: 0,CurrencyKey,CurrencyAlternateKey,CurrencyName
0,1,AFA,Afghani
1,2,DZD,Algerian Dinar
2,3,ARS,Argentine Peso
3,4,AMD,Armenian Dram
4,5,AWG,Aruban Guilder
...,...,...,...
100,101,KRW,Won
101,102,JPY,Yen
102,103,CNY,Yuan Renminbi
103,104,ZWD,Zimbabwe Dollar


### Pure python

In [24]:
with zipfile.ZipFile('DimCurrency.zip') as zf:
    print(zf.filelist)

[<ZipInfo filename='DimCurrency.csv' compress_type=deflate filemode='-rw-r--r--' file_size=2259 compress_size=1325>]


In [25]:
with zipfile.ZipFile('DimCurrency.zip') as zf:
    print(zf.namelist())

['DimCurrency.csv']


In [27]:
with zipfile.ZipFile('DimCurrency.zip') as zf:
    with zf.open(zf.namelist()[0]) as f:
        print(f.read())

b'\xef\xbb\xbfCurrencyKey,CurrencyAlternateKey,CurrencyName\r\n1,AFA,Afghani\r\n2,DZD,Algerian Dinar\r\n3,ARS,Argentine Peso\r\n4,AMD,Armenian Dram\r\n5,AWG,Aruban Guilder\r\n6,AUD,Australian Dollar\r\n7,AZM,Azerbaijanian Manat\r\n8,BSD,Bahamian Dollar\r\n9,BHD,Bahraini Dinar\r\n10,THB,Baht\r\n11,PAB,Balboa\r\n12,BBD,Barbados Dollar\r\n13,BEF,Belgian Franc\r\n14,VEB,Bolivar\r\n15,BOB,Boliviano\r\n16,BRL,Brazilian Real\r\n17,BND,Brunei Dollar\r\n18,BGN,Bulgarian Lev\r\n19,CAD,Canadian Dollar\r\n20,GHC,Cedi\r\n21,XOF,CFA Franc BCEAO\r\n22,CLP,Chilean Peso\r\n23,COP,Colombian Peso\r\n24,CRC,Costa Rican Colon\r\n25,HRK,Croatian Kuna\r\n26,CYP,Cyprus Pound\r\n27,CZK,Czech Koruna\r\n28,DKK,Danish Krone\r\n29,DEM,Deutsche Mark\r\n30,DOP,Dominican Peso\r\n31,VND,Dong\r\n32,GRD,Drachma\r\n33,EGP,Egyptian Pound\r\n34,SVC,El Salvador Colon\r\n35,AED,Emirati Dirham\r\n36,EUR,EURO\r\n37,FJD,Fiji Dollar\r\n38,HUF,Forint\r\n39,FRF,French Franc\r\n40,PYG,Guarani\r\n41,HKD,Hong Kong Dollar\r\n42,ISK,Ic

TypeError: ZipFile.open() missing 1 required positional argument: 'name'