## Importar Librerías

In [None]:
from psycopg2 import connect, Error
import pandas as pd
import csv
from io import StringIO
import glob
import functools

## Conexión

In [None]:
def getConnection():
    try:
        connection = connect(host='localhost',database='amazon',user='postgres', password='1234', port='5432')   
    except(Exception, Error) as error:
        connection.rollback()
        print("Error: %s" % error)
    return connection

## Función para cargar tabla a la base de datos

In [None]:
def to_database(dataframe,table):
    try:
        output = StringIO()
        dataframe.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE,escapechar='\\')
        output.seek(0)
        connection = getConnection()
        cursor = connection.cursor()
        cursor.copy_from(output,table,null='')
        connection.commit()
        cursor.close()
    except(Exception, Error) as error:
        print("Error: %s" % error)


## Crear DataFrame de los archivos

Se crea el DataFrame con los datos de todos los archivos terminados en ".tsv" del directorio

In [14]:
#pd.concat: Concatena todos los dataFrame creados, uno por cada archivo
#map: Iterador que aplica la funcion a todos los parametros entregados (Los nombres de los archivos)
#pd.read_csv: Lee el archivo .tsv        
#glob.glob: Encuentra el nombre de todos los archivos terminados en .tsv

df = pd.concat(map(functools.partial(pd.read_csv, sep='\t',on_bad_lines='skip', quoting = 3), glob.glob('*.tsv')))

## Cargar tabla Category

In [15]:
df_category = df[["product_category"]]

cantTotal = df_category.count()
cantNull = df_category.isnull().sum()

df_category = df_category.drop_duplicates(subset="product_category")

df_category = df_category.replace('_',' ',regex = True)

cantUnicos = df_category.count()

to_database(df_category,"category")

print("Cantidad total: ",int(cantTotal),"\nCantidad nulos: ",int(cantNull),"\nCantidad únicos: ",int(cantUnicos))

df_category

Cantidad total:  11259245 
Cantidad nulos:  0 
Cantidad únicos:  4


Unnamed: 0,product_category
0,Apparel
0,Automotive
0,Digital Music Purchase
0,Gift Card


## Cargar tabla Customer

In [16]:
df_customers_id = df[["customer_id"]]

cantTotal = df_customers_id.count()
cantNull = df_customers_id.isnull().sum()

df_customers_id = df_customers_id.drop_duplicates(subset="customer_id")

cantUnicos = df_customers_id.count()

to_database(df_customers_id,"customer")

print("Cantidad total: ",int(cantTotal),"\nCantidad nulos: ",int(cantNull),"\nCantidad únicos: ",int(cantUnicos))

df_customers_id

Cantidad total:  11259245 
Cantidad nulos:  0 
Cantidad únicos:  5453262


Unnamed: 0,customer_id
0,32158956
1,2714559
2,12608825
3,25482800
4,9310286
...,...
149078,42448250
149081,40383801
149082,15124244
149084,30603398


## Cargar tabla Time

In [17]:
df_time = df[["review_date"]].drop_duplicates(subset="review_date").dropna()

print("Rango de fechas: ", df_time["review_date"].iloc[0], " - ", df_time["review_date"].iloc[df_time.size-1])

to_database(df_time,"time")

df_time

Rango de fechas:  2013-01-14  -  2000-06-28


Unnamed: 0,review_date
0,2013-01-14
1,2014-03-04
2,2015-07-12
3,2015-06-03
4,2015-06-12
...,...
1688879,2001-11-17
1688880,2001-07-25
1688881,2000-12-07
1688882,2000-08-16


## Cargar tabla Product

In [18]:
df_products = df[["product_id", "product_parent", "product_title", "product_category"]].drop_duplicates(subset="product_id")

print("Cantidad total: ", df_products["product_id"].count())

df_products["product_category"] = df_products["product_category"].replace('_',' ',regex = True)

to_database(df_products,"product")

df_products

Cantidad total:  3771916


Unnamed: 0,product_id,product_parent,product_title,product_category
0,B01KL6O72Y,24485154,Easy Tool Stainless Steel Fruit Pineapple Core...,Apparel
1,B01ID3ZS5W,363128556,V28 Women Cowl Neck Knit Stretchable Elasticit...,Apparel
2,B01I497BGY,811958549,James Fiallo Men's 12-Pairs Low Cut Athletic S...,Apparel
3,B01HDXFZK6,692205728,Belfry Gangster 100% Wool Stain-Resistant Crus...,Apparel
4,B01G6MBEBY,431150422,JAEDEN Women's Beaded Spaghetti Straps Sexy Lo...,Apparel
...,...,...,...,...
147520,BT00CTP2B2,775486538,Amazon.com Gift Card in a Greeting Card (Vario...,Gift Card
147581,BT00CTP1EK,518174629,Amazon.com Gift Card - $500 (Christmas Tree de...,Gift Card
147630,B001H53QE4,825899505,"Amazon.com Gift Cards, Pack of 50 (Old Version...",Gift Card
148905,B000LGKQHU,941684275,Apple iTunes $25 Music Card,Gift Card


## Cargar tabla Review

In [19]:
df_review = df[["marketplace", "customer_id", "review_id", "product_id", "star_rating", "helpful_votes", "total_votes", "vine", "verified_purchase", "review_headline", "review_body","review_date"]]

to_database(df_review,"review")

df_review

Unnamed: 0,marketplace,customer_id,review_id,product_id,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,32158956,R1KKOXHNI8MSXU,B01KL6O72Y,4,0,0,N,Y,★ THESE REALLY DO WORK GREAT WITH SOME TWEAKING ★,"These Really Do Work Great, But You Do Need To...",2013-01-14
1,US,2714559,R26SP2OPDK4HT7,B01ID3ZS5W,5,1,2,N,Y,Favorite for winter. Very warm!,I love this dress. Absolute favorite for winte...,2014-03-04
2,US,12608825,RWQEDYAX373I1,B01I497BGY,5,0,0,N,Y,Great Socks for the money.,"Nice socks, great colors, just enough support ...",2015-07-12
3,US,25482800,R231YI7R4GPF6J,B01HDXFZK6,5,0,0,N,Y,Slick hat!,"I bought this for my husband and WOW, this is ...",2015-06-03
4,US,9310286,R3KO3W45DD0L1K,B01G6MBEBY,5,0,0,N,Y,I would do it again!,Perfect dress and the customer service was awe...,2015-06-12
...,...,...,...,...,...,...,...,...,...,...,...,...
149081,US,40383801,R57O26VBSLMP1,B0002CZPPG,5,10,10,N,N,Way easier than explaining your musical taste ...,Finally there is a way for your family to buy ...,2005-01-21
149082,US,15124244,R375D634NGSSPI,B0002CZPPG,4,8,44,N,N,itunes paid for,its very convenient to have an idea of how mut...,2004-12-17
149083,US,40383801,R2NCVKVC9B7I9C,B0002CZPPG,5,20,30,N,N,Way easier than explaining your musical taste ...,Finally there is a way for your family to buy ...,2004-11-30
149084,US,30603398,RC2BIM4XKDCY4,B0002CZPPG,4,63,72,N,N,A great way to turn cash into songs,I picked up a few of these at Target a while b...,2004-11-10
