## In this notebook
* we explore the 5-core dataset 
* we identify columns
* we pick out subsets of data that are useful for our needs
* we add the subsets of data to a postgres database

In [1]:
import os

In [2]:
from sqlalchemy import create_engine
import psycopg2 
import io


In [3]:
os.listdir('../data/amazon-review-data-2018/5-core')

['Video_Games_5.json.gz',
 'Home_and_Kitchen_5.json.gz',
 'Cell_Phones_and_Accessories_5.json.gz',
 'Sports_and_Outdoors_5.json.gz',
 'Pet_Supplies_5.json.gz',
 'Office_Products_5.json.gz',
 'Electronics_5.json.gz',
 'Tools_and_Home_Improvement_5.json.gz']

In [4]:
os.listdir('../data/amazon-review-data-2018/metadata')

['meta_Sports_and_Outdoors.json.gz',
 'meta_Home_and_Kitchen.json.gz',
 'meta_Video_Games.json.gz',
 'meta_Electronics.json.gz',
 'meta_Pet_Supplies.json.gz',
 'meta_Tools_and_Home_Improvement.json.gz',
 'meta_Office_Products.json.gz',
 'meta_Cell_Phones_and_Accessories.json.gz']

In [5]:
import gzip

In [6]:
import pandas as pd
import json

In [7]:
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 200
pd.options.display.max_rows = 1000

In [8]:
import re

In [9]:
qprods = [
    'TV', 'Television', 
    'Monitor', 'Screen',
    'Headphone', 'Headphones', 'Head phone', 'Head phones', 'head-phones', 'headset',
    'Mouse',
    'Laptop']

qprod_re = re.compile(r'\b' + r'\b|\b'.join(qprods) + r'\b', re.IGNORECASE)

In [10]:
qprod_re.pattern

'\\bTV\\b|\\bTelevision\\b|\\bMonitor\\b|\\bScreen\\b|\\bHeadphone\\b|\\bHeadphones\\b|\\bHead phone\\b|\\bHead phones\\b|\\bhead-phones\\b|\\bheadset\\b|\\bMouse\\b|\\bLaptop\\b'

In [11]:
products = []
with gzip.open('../data/amazon-review-data-2018/metadata/meta_Electronics.json.gz') as gzfile:
    for line in gzfile:
        prod_meta = json.loads(line.strip())
        if 'title' in prod_meta and qprod_re.findall(prod_meta['title']):
            products.append(prod_meta)                
        
products_df = pd.DataFrame.from_dict(products)    

In [12]:
products_df.shape

(159431, 19)

In [13]:
#products_df.sample(100)['asin'].value_counts()

In [14]:
products_df.head(1)

Unnamed: 0,category,tech1,description,fit,title,also_buy,tech2,brand,feature,rank,also_view,main_cat,similar_item,date,price,asin,imageURL,imageURLHighRes,details
0,"[Electronics, Headphones, Earbud Headphones]",,"[, <b>True High Definition Sound:</b><br>With CVC 6.0 noise cancellation technology, Safari bluetooth headphone intelligently filters out background noise for sound transmission in busy and noisy ...",,Wireless Bluetooth Headphones Earbuds with Microphone in Ear Headphones Noise Cancelling Earbuds with Mic Sweatproof Bluetooth Earphones Stereo Headset for Running Workout Gym Sports Secure Fit Case,[],,Enter The Arena,"[Superb Sound Quality: Plays crystal clear audio without a hitch. Built-in Microphone with CVC 6.0 noise cancellation allows for high quality and clear voice, even in noisy environments like insid...","[>#950 in Cell Phones & Accessories (See Top 100 in Cell Phones & Accessories), >#124 in Cell Phones & Accessories > Cell Phone Accessories > Headphones > Earbud Headphones, >#387 in Electronics >...",[],Home Audio & Theater,,"October 23, 2017",$7.99,132492776,"[https://images-na.ssl-images-amazon.com/images/I/410sQ%2BqT9uL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/417SFTTz%2ByL._SS40_.jpg, https://images-na.ssl-images-amazon.com/image...","[https://images-na.ssl-images-amazon.com/images/I/410sQ%2BqT9uL.jpg, https://images-na.ssl-images-amazon.com/images/I/417SFTTz%2ByL.jpg, https://images-na.ssl-images-amazon.com/images/I/51b3oob1PV...",


In [15]:
baseline_products = pd.DataFrame({
    'category' : products_df['category'].apply(lambda x: ', '.join(x)),
    'main_cat' : products_df['main_cat'],
    'description' : products_df['description'].apply(lambda x: ' '.join(x)),
    'title' : products_df['title'],
    'brand' : products_df['brand'],
    'feature' : products_df['feature'].apply(lambda x: ' '.join(x)),
    'also_view' : products_df['also_view'].apply(lambda x: ', '.join(x)),
    'also_buy' : products_df['also_buy'].apply(lambda x: ', '.join(x)),
    'date' : products_df['date'],
    'price' : products_df['price'],
    'asin' : products_df['asin'],
    'imageURL' : products_df['imageURL'].apply(lambda x: ', '.join(x)),
    'imageURLHighRes' : products_df['imageURLHighRes'].apply(lambda x: ', '.join(x)),
})

In [16]:
baseline_products.head()

Unnamed: 0,category,main_cat,description,title,brand,feature,also_view,also_buy,date,price,asin,imageURL,imageURLHighRes
0,"Electronics, Headphones, Earbud Headphones",Home Audio & Theater,"<b>True High Definition Sound:</b><br>With CVC 6.0 noise cancellation technology, Safari bluetooth headphone intelligently filters out background noise for sound transmission in busy and noisy en...",Wireless Bluetooth Headphones Earbuds with Microphone in Ear Headphones Noise Cancelling Earbuds with Mic Sweatproof Bluetooth Earphones Stereo Headset for Running Workout Gym Sports Secure Fit Case,Enter The Arena,"Superb Sound Quality: Plays crystal clear audio without a hitch. Built-in Microphone with CVC 6.0 noise cancellation allows for high quality and clear voice, even in noisy environments like inside...",,,"October 23, 2017",$7.99,132492776,"https://images-na.ssl-images-amazon.com/images/I/410sQ%2BqT9uL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/417SFTTz%2ByL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images...","https://images-na.ssl-images-amazon.com/images/I/410sQ%2BqT9uL.jpg, https://images-na.ssl-images-amazon.com/images/I/417SFTTz%2ByL.jpg, https://images-na.ssl-images-amazon.com/images/I/51b3oob1PVL..."
1,"Electronics, eBook Readers &amp; Accessories, eBook Readers",All Electronics,,"BOOX Max Carta Ereader,13.3"" Flexible Screen 16 GB with Built-in Wi-Fi,Bluetooth",BOOX,"Excellent Pinch Zooming Experience -- Highlight, annotate or erase on any document like it's pen on paper. Even make hand-written notes with smooth, crisp writing. Thoughtful Design for Reading P...","B077GVLMJN, B07BNGJXGS, B072DXXXN1, B07BYKZ9P3, B07FC76MK7, B07GZM5M98, B07KM2BDPP, B01KWX3M1I, 1400501717, B072FP9VWD, B06X1F7RPP, B07D9CMZX2, B077NSWLH2, B0002IOYPM, B07GBPR8QP, B01M8O73W9, B06X...",,"June 30, 2016",,285175270,"https://images-na.ssl-images-amazon.com/images/I/5127U2fTMDL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41d2M9mjkbL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/5127U2fTMDL.jpg, https://images-na.ssl-images-amazon.com/images/I/41d2M9mjkbL.jpg, https://images-na.ssl-images-amazon.com/images/I/41XefqQapRL.jpg..."
2,"Electronics, Headphones",All Electronics,Use these high quality headphones for internet chatting and enjoy the comfort and ease of the headphones with the microphone and in-line volume control.Works with: Skype msn AIM YAHOO! Windows Live,Polaroid Pbm2200 PC / Gaming Stereo Headphones With Microphone &amp; In-line Volume,Polaroid,"Ideal for PC Internet chatting, PC / Console gaming and music In-line volume control Optimal performance for VoIP usage Enhanced soft-cushioned ear pads",,,"December 13, 2012",,558835155,https://images-na.ssl-images-amazon.com/images/I/21rEirndRLL._SS40_.jpg,https://images-na.ssl-images-amazon.com/images/I/21rEirndRLL.jpg
3,"Electronics, eBook Readers &amp; Accessories, Bundles",Portable Audio &amp; Accessories,"Protect the screen on your Nook Color with this film kit. Featuring a clear layer of film, the kit prevents dust and scratches from forming on your screen without blocking your view. Simply peel i...",Nook Color Clear Screen Protector Kit 2-Pack,Barnes &amp; Noble,Includes two clear protective films and microfiber cleaning cloth.,,,"January 9, 2011",$1.96,594033896,"https://images-na.ssl-images-amazon.com/images/I/41LSp0sol6L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/31GO9OqHPAL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/3...","https://images-na.ssl-images-amazon.com/images/I/41LSp0sol6L.jpg, https://images-na.ssl-images-amazon.com/images/I/31GO9OqHPAL.jpg, https://images-na.ssl-images-amazon.com/images/I/31lCFMoVjHL.jpg..."
4,"Electronics, eBook Readers & Accessories, Screen Protectors",Portable Audio & Accessories,"The Anti-Glare Screen Protector Kit has a specially formulated matte finish that resists both glare and fingerprints; it also protects your screen against scratches, dust, and dirt. Easy to apply ...",Nook Anti-Glare Screen Protector Kit Genuine Barnes and Noble,Barnes & Noble,Kit contains 2 pieces of protective film and a microfiber cloth Suitable for NOOK HD+ only Instructions included,,,"January 6, 2015",,594467942,,


In [17]:
baseline_products.shape

(159431, 13)

## Shortlisting baseline products that have at least 5 reviews

In [18]:
from collections import defaultdict

In [24]:
asin_review_counts = defaultdict(int)
for asin in baseline_products['asin'].tolist():
    asin_review_counts[asin] = 0
    
baseline_product_reviews = []
with gzip.open('../data/amazon-review-data-2018/5-core/Electronics_5.json.gz') as gzfile:
    c = 0
    for line in gzfile:
        review = json.loads(line.strip())
        if review['asin'] in asin_review_counts:
            baseline_product_reviews.append(review)
            asin_review_counts[review['asin']] += 1
        c += 1

        if c % 1000000 == 0:
            print(c)
        

1000000
2000000
3000000
4000000
5000000
6000000


In [25]:
len(baseline_product_reviews)

1389881

In [26]:
baseline_product_reviews[:2]

[{'overall': 4.0,
  'vote': '71',
  'verified': True,
  'reviewTime': '12 8, 2008',
  'reviewerID': 'AE8R1JSMJYIU',
  'asin': '0972683275',
  'reviewerName': 'L. D. Ellison',
  'reviewText': 'I am using this mount for a 27" LCD in my bedroom. Using a swivel mount with tilt works great for almost any room layout. The black powder-coat paint matches the TV, and looks great. Adjustment and locking is easy with the included hex wrench. I would rate this "5 stars" if the mounting screws were better. Made from relatively soft steel, the screw head strips very easily. After ruining one screw, I replaced it with a large black deck screw. It might be advisable to drill small pilot holes before attempting to drive the screws.\n\nWord of caution: If you do not drive the proper mounting screws into the center of a wall stud, if you exceed the specified TV weight limit, or if you try to get by with a really cheap swing arm mount, you may be signing a death sentence for your $500 TV (and perhaps you

In [27]:
baseline_product_reviews_df = pd.DataFrame.from_records(baseline_product_reviews)

In [28]:
baseline_product_reviews_df.head()

Unnamed: 0,overall,vote,verified,reviewTime,reviewerID,asin,reviewerName,reviewText,summary,unixReviewTime,image,style
0,4.0,71.0,True,"12 8, 2008",AE8R1JSMJYIU,972683275,L. D. Ellison,"I am using this mount for a 27"" LCD in my bedroom. Using a swivel mount with tilt works great for almost any room layout. The black powder-coat paint matches the TV, and looks great. Adjustment an...","Looks good, does the job",1228694400,,
1,4.0,6.0,True,"02 19, 2008",A3AKVALGT4Y02G,972683275,MICHAEL,"For the money you can't go wrong. Yes, it has a litlle play in the mechanism but, you can adjust your mounting to accomodate that. It works great and frees up counter space. I would do it all agai...",can't beat it for the price,1203379200,,
2,5.0,,True,"11 18, 2017",A39KGSKV4C7LDV,972683275,Ken Mile-Hi,I needed a television hanger with good extension. This did the job. I installed this in a closable TV cabinet that requires me to push the TV in and pull it out routinely. By loosing the nuts on t...,Glad I bought this.,1510963200,,
3,5.0,,True,"10 31, 2017",A2Y3EXMNVOV2IO,972683275,Chocobo Sandwich,"Good quality for the price. Actually does go 24 inches from the base to the vesa mount. Not quite enough to turn a thin bezel 55"" tv 90 degrees but 70 degrees is good enough for me.",Good quality for the price,1509408000,"[https://images-na.ssl-images-amazon.com/images/I/71XJimCm3QL._SY88.jpg, https://images-na.ssl-images-amazon.com/images/I/71RnBporJBL._SY88.jpg, https://images-na.ssl-images-amazon.com/images/I/71...",
4,5.0,,True,"10 1, 2017",A2FEE324VENYT7,972683275,Walterw,Very nice and sturdy.,Five Stars,1506816000,,


In [29]:
asin_5_counts = dict(filter(lambda x: x[1] >= 5, asin_review_counts.items()))

In [30]:
len(asin_5_counts)

32474

In [31]:
baseline_product_reviews_df.shape

(1389881, 12)

In [32]:
# shortlist reviews for those asins that contain at least 5 reviews
baseline_product_reviews_asin5count = baseline_product_reviews_df[baseline_product_reviews_df['asin'].isin(list(asin_5_counts.keys()))]

In [33]:
baseline_product_reviews_asin5count['asin'].value_counts()

B00DIF2BO2    6226
B0043T7FXE    5457
B00BP5KOPA    4625
B006JH8T3S    4008
B00CD8AFFW    3917
              ... 
B00ZI6H0JU       5
B0016ZU9EU       5
B00ZGE9CT0       5
B00ZFZ2TKY       5
B01HI9ZK8M       5
Name: asin, Length: 32474, dtype: int64

## Save and load baseline products and reviews to and from database

In [34]:
conn_string = 'postgresql+psycopg2://gabbydbuser:gabbyDBpass@localhost:5432/gabbyDB'

In [35]:
db = create_engine(conn_string)
conn = db.connect()


In [36]:
baseline_products_asin5count = baseline_products[baseline_products['asin'].isin(asin_5_counts.keys())].drop_duplicates('asin')

In [37]:
baseline_products_asin5count['asin'].value_counts()

0972683275    1
B00SPWPF1O    1
B00SR7HCCM    1
B00SR7HE7U    1
B00SR614O0    1
             ..
B00AJEF82C    1
B00AJE4VK2    1
B00AJD6PK2    1
B00AJD0GRU    1
B01HJDNL60    1
Name: asin, Length: 32474, dtype: int64

In [38]:
baseline_products_asin5count.shape

(32474, 13)

In [58]:
baseline_products_asin5count.to_sql('baseline_products', con=conn, if_exists='replace',index=False, method='multi')

32474

In [46]:
baseline_product_reviews_asin5count.dtypes

overall           float64
vote               object
verified             bool
reviewTime         object
reviewerID         object
asin               object
reviewerName       object
reviewText         object
summary            object
unixReviewTime      int64
image              object
style              object
dtype: object

In [60]:
# converting reviews to adequate datatypes and adding necessary columns
baseline_reviews_asin5count_for_sql = pd.DataFrame({
    'review_id': list(range(baseline_product_reviews_asin5count.shape[0])),
    'rating': baseline_product_reviews_asin5count['overall'].astype(float),
    'sentiment': baseline_product_reviews_asin5count['overall'].astype(float).apply(lambda x: 'positive' if x > 3 else 'negative'),
    'vote': baseline_product_reviews_asin5count['vote'].str.replace(',', '').astype(float).fillna(0),
    'verified': baseline_product_reviews_asin5count['verified'].astype(bool),
    'reviewerID': baseline_product_reviews_asin5count['reviewerID'].astype(str),
    'asin': baseline_product_reviews_asin5count['asin'].astype(str),
    'reviewText': baseline_product_reviews_asin5count['reviewText'].astype(str),
    'reviewTitle': baseline_product_reviews_asin5count['summary'].astype(str),
    'reviewTime': pd.to_datetime(baseline_product_reviews_asin5count['unixReviewTime'], unit='s'),
})

In [61]:
baseline_reviews_asin5count_for_sql.dtypes

review_id               int64
rating                float64
sentiment              object
vote                  float64
verified                 bool
reviewerID             object
asin                   object
reviewText             object
reviewTitle            object
reviewTime     datetime64[ns]
dtype: object

In [62]:
baseline_reviews_asin5count_for_sql.sample(10)

Unnamed: 0,review_id,rating,sentiment,vote,verified,reviewerID,asin,reviewText,reviewTitle,reviewTime
574847,574807,5.0,positive,0.0,True,A8UKLYGZO9K4S,B00C42P2LY,I love this case. It is soft and cushioned. I get many compliments on it. So glad I ordered it.,Love it!,2014-06-13
417630,417590,5.0,positive,2.0,True,AB8G006H669UC,B007AAUTG8,"Excellent item, works as advertised. After reading other reviews concerning water problems, I made a mount and have it just inside back window of motorhome. Straight line with no obstructions to...","Excellent item, mounted inside back window of motorhome to keep it dry",2014-09-10
230196,230157,3.0,negative,0.0,True,A1RPVO99UU7884,B003CJTR8M,"Nice headphones, but they didn't last long. Construction is not that great.",Not that great.,2015-02-18
933791,933740,1.0,negative,0.0,True,A1GG99CY5Y0RRS,B00R45XCMM,"This television is a piece of junk. My first set was replaced, because videos would constantly fade from bright-dark-bright. 3D transmissions were periodically disabled during viewing. The rampant...",Active 3D glasses required,2016-03-02
903687,903638,4.0,positive,0.0,True,A38X94E8CP1B3M,B00P20TFWM,"Haven't used it but one time it worked ok . What impressed me was their email, checking to make sure all was ok. I don't get that to often.",Haven't used it but one time it worked ok. ...,2015-09-17
477677,477637,5.0,positive,0.0,True,A1L6LAE5UGW0SJ,B009A5204K,"Very nice set, could use a bit more bass but quality is top notch.",Five Stars,2016-03-08
1105934,1105880,3.0,negative,0.0,True,A21ETP31MA04G1,B014FASL1A,ok,Three Stars,2017-08-26
993443,993389,5.0,positive,0.0,True,AC8EJ25I7DLI8,B00V7N3ZUG,Fantastic headphones. Ended up buying pairs for both my gf and my sister.,Five Stars,2016-07-18
855338,855289,4.0,positive,0.0,True,A2Q82KC2XUS0TH,B00MNABXYM,Good buy. Works great.,Four Stars,2016-04-24
227672,227633,4.0,positive,0.0,True,AFWINLPXCC34Y,B003AIL2HE,"Nice headphones for the gym! Purchased for my husband, he said they were a little stiff at first, but the sound quality is nice.",Perfect head phone for,2018-04-20


In [63]:
baseline_reviews_asin5count_for_sql.to_sql('baseline_reviews', con=conn, if_exists='replace',index=False, method='multi')

1389714

### Loading data trials

In [56]:
rconn = db.raw_connection()


In [None]:
baseline_product_asins = pd.read_sql('''select distinct(asin) from baseline_products;''', conn)

In [None]:
baseline_product_asins.head()

Unnamed: 0,asin
0,B0010TVLP8
1,B005WKGAFC
2,B0073JQUAK
3,B000BITRP0
4,B003JULCBE


In [None]:
# save baseline products to file
baseline_products.to_json("../data/amazon-review-data-2018/sample/baseline-products.json")


# Scratch

In [37]:
with rconn.cursor() as cursor:
    sql1 = '''select * from data;'''
    cursor.execute(sql1)
    for i in cursor.fetchall():
        print(i)
    
    # conn.commit()
    rconn.close()

('Electronics, Headphones, Earbud Headphones', 'Home Audio & Theater', ' <b>True High Definition Sound:</b><br>With CVC 6.0 noise cancellation technology, Safari bluetooth headphone intelligently filters out background noise for sound transmission in busy and noisy environments. The latest Bluetooth 4.1 technology features faster & stable signal, clearer sound quality and lower power consumption.<br><br><b>Sweat Proof Protection:</b><br>IPX7 water resistant material prevents headphone damage when you are getting drenched in sweat after a grueling workout.<br><br><b>Long Battery Life:</b><br>Up to 6 hours of continuous music/talk time and up to 100 hours of standby with a single charge so you can spend more time listening to music and less time charging.<br><br><b>Stylish and Ergonomic Design:</b><br>With a choice between small, medium or large size ear tips, you are bound to have a secure fit that stays in place whether you are running or working out.<br><br><b>Universally Compatible:<

100


In [110]:
df = pd.DataFrame.from_dict(data_100_sample)

In [112]:
df.tail()

Unnamed: 0,overall,vote,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,image
95,5.0,,True,"09 11, 2014",A1ZZFX6SHV84ZU,594033926,,grasshopper,very nice item,Five Stars,1410393600,
96,5.0,,True,"09 11, 2014",A34JP15SDQIII6,594033926,,Joe,"Since B&N no longer carry the Nook Color, or the cases... this one is actually better and cheaper than what I had originally bought from B&N.","If you need one, get this one",1410393600,
97,5.0,,True,"02 18, 2016",A3HN9PMEGAM6JT,594459451,,Karen,works great with nook color,Five Stars,1455753600,
98,5.0,,True,"02 11, 2016",A1CHTT6E3NV5YL,594459451,,Amazon Shopper,"The 90 degree connector is a bonus that allows her to read while it is charging without the older style straight connector sticking down. This cable does everything including charging, and data t...",Works great on my Wife's older model Nook Color.,1455148800,
99,5.0,,True,"02 7, 2016",A1N8CPCN6PT8FP,594459451,,DKChris,Great solution to original power cord. New design gives the cord a better position so that it won't twist and wear out. Seems to be well made and has stood up to daily use without an issue.,Good Product,1454803200,


## Saving data subset into a database

In [119]:
engine = create_engine('postgresql+psycopg2://gabbydbuser:gabbyDBpass@localhost:5432/gabbyDB')


In [126]:
df.to_sql('products_reviews', engine, if_exists='replace',index=False)

ProgrammingError: (psycopg2.ProgrammingError) can't adapt type 'dict'
[SQL: INSERT INTO products_reviews (overall, vote, verified, "reviewTime", "reviewerID", asin, style, "reviewerName", "reviewText", summary, "unixReviewTime", image) VALUES (%(overall)s, %(vote)s, %(verified)s, %(reviewTime)s, %(reviewerID)s, %(asin)s, %(style)s, %(reviewerName)s, %(reviewText)s, %(summary)s, %(unixReviewTime)s, %(image)s)]
[parameters: ({'overall': 5.0, 'vote': '67', 'verified': True, 'reviewTime': '09 18, 1999', 'reviewerID': 'AAP7PPBU72QFM', 'asin': '0151004714', 'style': {'Format:': ' Hardcover'}, 'reviewerName': 'D. C. Carrad', 'reviewText': 'This is the best novel I have read in 2 or 3 years.  It is everything that fiction should be -- beautifully written, engaging, well-plotted and struc ... (806 characters truncated) ...  and that her publishers will bring her first novel back into print -- I  want to read it.  Thank you, Ms. Messud, for writing such a wonderful work.', 'summary': 'A star is born', 'unixReviewTime': 937612800, 'image': None}, {'overall': 3.0, 'vote': '5', 'verified': True, 'reviewTime': '10 23, 2013', 'reviewerID': 'A2E168DTVGE6SV', 'asin': '0151004714', 'style': {'Format:': ' Kindle Edition'}, 'reviewerName': 'Evy', 'reviewText': 'Pages and pages of introspection, in the style of writers like Henry James.  I like this kind of  novels and the writer occasionally delights me with ... (163 characters truncated) ... e tightly crystallizing, the themes and "truths" within the story.\n\nIt\'s a story I could relate to but I wish it hadn\'t been too tedious to read.', 'summary': 'A stream of consciousness novel', 'unixReviewTime': 1382486400, 'image': None}, {'overall': 5.0, 'vote': '4', 'verified': False, 'reviewTime': '09 2, 2008', 'reviewerID': 'A1ER5AYS3FQ9O3', 'asin': '0151004714', 'style': {'Format:': ' Paperback'}, 'reviewerName': 'Kcorn', 'reviewText': "This is the kind of novel to read when you have time to lose yourself in a book for days, possibly weeks. I had to go back and reread it as soon as I ... (1814 characters truncated) ... re out where she belongs in the world and how to make peace with her difficult family as well as coming to grips with history's impact on her family.", 'summary': "I'm a huge fan of the author and this one did not disappoint", 'unixReviewTime': 1220313600, 'image': None}, {'overall': 5.0, 'vote': '13', 'verified': False, 'reviewTime': '09 4, 2000', 'reviewerID': 'A1T17LMQABMBN5', 'asin': '0151004714', 'style': {'Format:': ' Hardcover'}, 'reviewerName': 'Caf Girl Writes', 'reviewText': "What gorgeous language! What an incredible writer! The Last Life is one of  the best written novels I have ever read. This incredible novel contains  ... (912 characters truncated) ...  is -- without a  stretch of doubt -- the best historical fiction novel I have ever read.  This novel should not be overlooked. Run along and get it!", 'summary': 'The most beautiful book I have ever read!', 'unixReviewTime': 968025600, 'image': None}, {'overall': 3.0, 'vote': '8', 'verified': True, 'reviewTime': '02 4, 2000', 'reviewerID': 'A3QHJ0FXK33OBE', 'asin': '0151004714', 'style': {'Format:': ' Hardcover'}, 'reviewerName': 'W. Shane Schmidt', 'reviewText': 'I was taken in by reviews that compared this book with The Leopard or promised a bildungsroman set in a family that is isolated and on the move.  Wel ... (1145 characters truncated) ... like mine which was one of detached interest.  For my part, I was relieved that I could close the book whenever I wanted  to quiet my jabbery friend.', 'summary': 'A dissenting view--In part.', 'unixReviewTime': 949622400, 'image': None}, {'overall': 4.0, 'vote': None, 'verified': True, 'reviewTime': '06 5, 2013', 'reviewerID': 'A3IYSOTP3HA77N', 'asin': '0380709473', 'style': {'Format:': ' Kindle Edition'}, 'reviewerName': 'B. Marks', 'reviewText': 'I read this probably 50 years ago in my youth and I just re-read it for the first time.  It was a fun read but not as good as my memory of it.\n\nBarry', 'summary': 'Above average mystery', 'unixReviewTime': 1370390400, 'image': None}, {'overall': 5.0, 'vote': None, 'verified': True, 'reviewTime': '06 27, 2016', 'reviewerID': 'A11SXV34PZUQ5E', 'asin': '0380709473', 'style': {'Format:': ' Kindle Edition'}, 'reviewerName': 'Tom C.', 'reviewText': "I read every Perry mason book voraciously. Finding the Lam/Cool mysteries, and getting to enjoy more of Gardner's characters, is like Christmas morning as a kid. Each book is a gift!", 'summary': 'Lam is cool!', 'unixReviewTime': 1466985600, 'image': None}, {'overall': 5.0, 'vote': None, 'verified': True, 'reviewTime': '07 30, 2015', 'reviewerID': 'A2AUQM1HT2D5T8', 'asin': '0380709473', 'style': {'Format:': ' Kindle Edition'}, 'reviewerName': 'ema', 'reviewText': 'I love this series of Bertha and Lamb..  Great novels.', 'summary': 'Five Stars', 'unixReviewTime': 1438214400, 'image': None}  ... displaying 10 of 100 total bound parameter sets ...  {'overall': 5.0, 'vote': None, 'verified': True, 'reviewTime': '02 11, 2016', 'reviewerID': 'A1CHTT6E3NV5YL', 'asin': '0594459451', 'style': None, 'reviewerName': 'Amazon Shopper', 'reviewText': 'The 90 degree connector is a bonus that allows her to read while it is charging without the older style straight connector sticking down.  This cable does everything including charging, and data transfer, works just as well as the original that came with her Nook Color, but better.', 'summary': "Works great on my Wife's older model Nook Color.", 'unixReviewTime': 1455148800, 'image': None}, {'overall': 5.0, 'vote': None, 'verified': True, 'reviewTime': '02 7, 2016', 'reviewerID': 'A1N8CPCN6PT8FP', 'asin': '0594459451', 'style': None, 'reviewerName': 'DKChris', 'reviewText': "Great solution to original power cord.  New design gives the cord a better position so that it won't twist and wear out.  Seems to be well made and has stood up to daily use without an issue.", 'summary': 'Good Product', 'unixReviewTime': 1454803200, 'image': None})]
(Background on this error at: https://sqlalche.me/e/14/f405)

In [120]:
products_df.columns

Index(['category', 'tech1', 'description', 'fit', 'title', 'also_buy', 'tech2',
       'brand', 'feature', 'rank', 'also_view', 'main_cat', 'similar_item',
       'date', 'price', 'asin', 'imageURL', 'imageURLHighRes', 'details'],
      dtype='object')

In [121]:
products_df.head(0).to_sql('products_metadata', engine, if_exists='replace',index=False)

0

In [125]:
products_df.head().to_sql('products_metadata', engine, if_exists='replace',index=False)

ValueError: A string literal cannot contain NUL (0x00) characters.

In [124]:
conn = engine.raw_connection()
with conn.cursor() as cur:
    output = io.StringIO()
    products_df.head().to_csv(output, sep='\t', header=False, index=False)
    output.seek(0)
    contents = output.getvalue()
    cur.copy_from(output, 'products_metadata', null="") # null values become ''
    conn.commit()

Error: need to escape, but no escapechar set