This script builds a model for classifying two products from different retailers as being the same or not, based on data from the retailers. In currently uses the following features:
- Cosine distance between:
    - Product names
    - Product descriptions

The last two cells find the inter-retailer price differences for products known to be the same and determine the value of correctly idenfitied arbitrage opportunities
    

In [58]:
# This tells matplotlib not to try opening a new window for each plot.
%matplotlib inline

# Import a bunch of libraries.
import time
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.spatial.distance import cosine

# Set the randomizer seed so results are the same each time.
np.random.seed(0)

In [59]:
# Load the data
amazon = pd.read_csv("amazon.csv", na_filter=False, encoding="ISO-8859-1")
google = pd.read_csv("googleproducts.csv", na_filter=False, encoding="ISO-8859-1")
amazon_google = pd.read_csv("Amzon_GoogleProducts_perfectMapping.csv", encoding="ISO-8859-1")
google

Unnamed: 0,id,name,description,manufacturer,price
0,http://www.google.com/base/feeds/snippets/1112...,learning quickbooks 2007,learning quickbooks 2007,intuit,38.99
1,http://www.google.com/base/feeds/snippets/1153...,superstart! fun with reading & writing!,fun with reading & writing! is designed to hel...,,8.49
2,http://www.google.com/base/feeds/snippets/1134...,qb pos 6.0 basic software,qb pos 6.0 basic retail mngmt software. for re...,intuit,637.99
3,http://www.google.com/base/feeds/snippets/1204...,math missions: the amazing arcade adventure (g...,save spectacle city by disrupting randall unde...,,12.95
4,http://www.google.com/base/feeds/snippets/1224...,production prem cs3 mac upgrad,adobe cs3 production premium mac upgrade from ...,adobe software,805.99
5,http://www.google.com/base/feeds/snippets/1231...,video studio 11 plus,corel video studio 11 plus is consumer video e...,corel corporation,103.99
6,http://www.google.com/base/feeds/snippets/1336...,edius pro 4,whether you are working with standard definiti...,canopus/grass valley,585.99
7,http://www.google.com/base/feeds/snippets/1283...,qb pos 6.0 pro multi store sw,qb pos 6.0 pro multistore retail mngmt softwar...,intuit,1054.99
8,http://www.google.com/base/feeds/snippets/1294...,qb credit card kit 3.0,quickbooks credit card kit v3.0 (2007),intuit,25.99
9,http://www.google.com/base/feeds/snippets/1299...,vegas movie studiodvd+8 plat,sony media software vegas movie studio +dvd 8 ...,sony creative software,92.99


In [3]:
# create a table with all the matched products
data = pd.merge(amazon, amazon_google, left_on="id", right_on="idAmazon")
data = pd.merge(data, google, left_on="idGoogleBase", right_on="id")
data = data[['title', 'description_x', 'name', 'description_y']]
data.columns = ['amazon_name', 'amazon_description', 'google_name', 'google_description']
data['match'] = np.ones(data.shape[0])
data    

Unnamed: 0,amazon_name,amazon_description,google_name,google_description,match
0,clickart 950 000 - premier image pack (dvd-rom),,clickart 950000 - premier image pack (dvd-rom),massive collection of images & fonts for all y...,1
1,noah's ark activity center (jewel case ages 3-8),,the beginners bible: noah's ark activity cente...,,1
2,peachtree by sage premium accounting for nonpr...,peachtree premium accounting for nonprofits 20...,sage (ptree) - vernfp2007rt - premium accounti...,if you're like most nonprofit organizations yo...,1
3,singing coach unlimited,singing coach unlimited - electronic learning ...,singing coach unlimited - electronic learning ...,learn to sing with the help of a patented real...,1
4,adobe after effects professional 6.5 upgrade f...,upgrade only; installation of after effects st...,adobe software 22070152 after effects 6.5 pbupgrd,adobe after effects pb 6.5 win upgrade.standar...,1
5,mia's math adventure: just in time,in mia's math adventure: just in time children...,kutoka interactive 61208 mia's math adventure:...,mia's math adventure tells a captivating story...,1
6,mia's math adventure: just in time,in mia's math adventure: just in time children...,kutoka interactive 61208 - mias math adventure...,kutoka interactive 61208 : mia s math adventur...,1
7,disney's 1st & 2nd grade bundle (pixar 1st gra...,disney's 1st & 2nd grade bundle will help your...,disney learning 1st-2nd grade win,disney learning 1st & 2nd features an all-star...,1
8,paper art: gift wrapping,how many times have you heard that it's the th...,paper art volume 2: gift wrapping,how many times have you heard that it's the th...,1
9,spy sweeper spanish,,webroot software inc - 31250 - spy sweeper spa...,a sypware infection is no longer just a matter...,1


In [64]:
# create table of mismatches
def findDifferent(amazonId):
    googleMatches = amazon_google.loc[amazon_google.idAmazon==amazonId].idGoogleBase
    googleNotMatches = google.loc[google.id.isin(googleMatches)==False]
    googleNotMatches = googleNotMatches.iloc[np.random.permutation(len(googleNotMatches) - 1)]
    return googleNotMatches[:1].iloc[0].id

amazon_not_google = pd.DataFrame(amazon_google.idAmazon)
amazon_not_google['idGoogleBase'] = amazon_not_google.idAmazon.map(findDifferent)
amazon_not_google

Unnamed: 0,idAmazon,idGoogleBase
0,b000jz4hqo,http://www.google.com/base/feeds/snippets/1134...
1,b00004tkvy,http://www.google.com/base/feeds/snippets/5707...
2,b000g80lqo,http://www.google.com/base/feeds/snippets/8737...
3,b0006se5bq,http://www.google.com/base/feeds/snippets/1843...
4,b00021xhzw,http://www.google.com/base/feeds/snippets/1790...
5,b0000dbykm,http://www.google.com/base/feeds/snippets/9426...
6,b0000dbykm,http://www.google.com/base/feeds/snippets/1348...
7,b00029bqa2,http://www.google.com/base/feeds/snippets/9453...
8,b0007prnjo,http://www.google.com/base/feeds/snippets/1667...
9,b000i82j80,http://www.google.com/base/feeds/snippets/1088...


In [68]:
def cosDist(s1, s2):
    returnDist = 1
    try:
        vecs = TfidfVectorizer().fit_transform([s1,s2]).todense()
        cosDist = cosine(vecs[0], vecs[1])
        if not np.isnan(cosDist):
            returnDist = cosDist 
    except:
        pass
    return returnDist

def findSim(amazonId):
    amazonNm = amazon.loc[amazon.id==amazonId].title.values[0]
    googleMatches = amazon_google.loc[amazon_google.idAmazon==amazonId].idGoogleBase
    googleNotMatches = google.loc[google.id.isin(googleMatches)==False]
    notMatchNames = googleNotMatches.name.values
    notMatchIds = googleNotMatches.id.values
    simId = ''
    for i, notMatchName in enumerate(notMatchNames):
        if cosDist(notMatchName, amazonNm) < .9:
            simId = notMatchIds[i]
            break
    return simId

amazon_sim_google = pd.DataFrame(amazon_google.idAmazon)
amazon_sim_google['idGoogleBase'] = amazon_sim_google.idAmazon.map(findSim)
amazon_sim_google

Unnamed: 0,idAmazon,idGoogleBase
0,b000jz4hqo,http://www.google.com/base/feeds/snippets/1345...
1,b00004tkvy,http://www.google.com/base/feeds/snippets/6645...
2,b000g80lqo,http://www.google.com/base/feeds/snippets/1112...
3,b0006se5bq,http://www.google.com/base/feeds/snippets/1355...
4,b00021xhzw,http://www.google.com/base/feeds/snippets/1349...
5,b0000dbykm,http://www.google.com/base/feeds/snippets/1204...
6,b0000dbykm,http://www.google.com/base/feeds/snippets/1204...
7,b00029bqa2,http://www.google.com/base/feeds/snippets/1819...
8,b0007prnjo,http://www.google.com/base/feeds/snippets/1824...
9,b000i82j80,http://www.google.com/base/feeds/snippets/1806...


In [65]:
# def findRealSim(amazonId):
#     amazonNm = amazon.loc[amazon.id==amazonId].title.values[0]
#     googleMatches = amazon_google.loc[amazon_google.idAmazon==amazonId].idGoogleBase
#     googleNotMatches = google.loc[google.id.isin(googleMatches)==False]
#     notMatchNames = googleNotMatches.name.values
#     notMatchIds = googleNotMatches.id.values
#     simId = ''
#     for i, notMatchName in enumerate(notMatchNames):
#         if amazonNm in notMatchName:
#             simId = notMatchIds[i]
#             break
#     return simId

# amazon_real_sim_google = pd.DataFrame(amazon_google.idAmazon)
# amazon_real_sim_google['idGoogleBase'] = amazon_real_sim_google.idAmazon.map(findRealSim)
amazon_real_sim_google = amazon_real_sim_google.loc[amazon_real_sim_google.idGoogleBase!='']
amazon_real_sim_google

Unnamed: 0,idAmazon,idGoogleBase
9,b000i82j80,http://www.google.com/base/feeds/snippets/1208...
10,b000i82j80,http://www.google.com/base/feeds/snippets/1208...
28,b0000ycfdq,http://www.google.com/base/feeds/snippets/1284...
37,b000h22rbg,http://www.google.com/base/feeds/snippets/5655...
42,b000buqo9a,http://www.google.com/base/feeds/snippets/1276...
47,b0009i9tqy,http://www.google.com/base/feeds/snippets/1230...
96,b000p9cr34,http://www.google.com/base/feeds/snippets/2084...
109,b0007d8es0,http://www.google.com/base/feeds/snippets/1361...
122,b000jx3qxq,http://www.google.com/base/feeds/snippets/7150...
123,b000jx3qxq,http://www.google.com/base/feeds/snippets/7150...


In [69]:
# add mismatched examples to matched ones
amazon_not_google = pd.concat([amazon_not_google, amazon_sim_google, amazon_real_sim_google])
moreData = pd.merge(amazon, amazon_not_google, left_on="id", right_on="idAmazon")
moreData = pd.merge(moreData, google, left_on="idGoogleBase", right_on="id")
moreData = moreData[['title', 'description_x', 'name', 'description_y']]
moreData.columns = ['amazon_name', 'amazon_description', 'google_name', 'google_description']
moreData['match'] = np.zeros(moreData.shape[0])
data = pd.concat([data, moreData])
data

Unnamed: 0.1,Unnamed: 0,amazon_description,amazon_name,descriptionDistance,firstWordSame,google_description,google_name,lastWordSame,match,nameDistance
4120,1520,zoo tycoon 2: marine mania is a great new expa...,zoo tycoon 2: marine mania expansion,1.000000,False,pantone solid chips 2-book set,solid chips 2book set,False,0,1.000000
3481,881,for song writing studio production film scorin...,motu digital performer 5 digital audio softwar...,0.917933,False,apple computer ma888z/a : usually ships in 24 ...,ma888z/a final cut studio - ( v. 2 ) - version...,False,0,0.898369
1870,570,with windows xp professional edition with serv...,microsoft windows xp professional full version...,0.983041,False,mac os 10.3.x or higher,symantec norton anti-virus 10 mac 5 user pack,False,0,1.000000
1058,1058,faxstf pro serious faxing demands! you can sen...,faxstf pro mac os 10.3.9 or above,0.938989,False,system requirements powerpc g3 or intel equipp...,allume smith micro faxstf pro,False,1,0.793863
2368,1068,halo takes you deep into the far future with t...,halo (mac),0.943361,False,freeverse software 006 : join the lancer squad...,freeverse software 006 - active lancer (mac 9....,False,0,0.858569
1822,522,iwork '06 features two powerful applications f...,apple iwork '06 family pack (mac dvd) [older v...,0.856589,False,boost growing minds with the brain-building ac...,brain play preschool - 1st grade,False,0,1.000000
1099,1099,in any given image you may need more than one ...,mask pro 4.0 1u,0.807805,False,impossible! not anymore with mask pro 4 a comp...,onone software mpr-40211 mask pro 4 1-user - f...,False,1,0.802147
3782,1182,designed for small organizations and departmen...,microsoft windows server 2003 client additiona...,0.825301,False,axis communications 0202-034 : usually ships i...,0202-034 axis camera station - license - 1 add...,False,0,0.872640
3687,1087,adobe after effects 7.0 win pb software sets n...,adobe after effects professional 7.0,0.854642,False,adobe systems 38039440dm : usually ships in 24...,38039440dm adobe flash cs3 professional - ( v....,False,0,0.831690
4612,2012,,discrete drums: vol.1,1.000000,False,two-tape set including the first volume settin...,tommy igoe hudson music getting started on drums,False,0,0.883282


In [70]:
# create distance features

data["descriptionDistance"] = list(map(lambda x: cosDist(x[0],x[1]), zip(data.google_description, data.amazon_description)))
data["nameDistance"] = list(map(lambda x: cosDist(x[0],x[1]), zip(data.google_name, data.amazon_name)))

In [71]:
data.to_csv("data_with_some_features_and_sims.csv", na_filter=False, encoding="ISO-8859-1")
data = pd.read_csv("data_with_some_features.csv", na_filter=False, encoding="ISO-8859-1")
data

Unnamed: 0.1,Unnamed: 0,amazon_description,amazon_name,descriptionDistance,google_description,google_name,match,nameDistance
0,0,,clickart 950 000 - premier image pack (dvd-rom),1.000000,massive collection of images & fonts for all y...,clickart 950000 - premier image pack (dvd-rom),1,3.264795e-01
1,1,,noah's ark activity center (jewel case ages 3-8),1.000000,,the beginners bible: noah's ark activity cente...,1,5.227885e-01
2,2,peachtree premium accounting for nonprofits 20...,peachtree by sage premium accounting for nonpr...,0.454952,if you're like most nonprofit organizations yo...,sage (ptree) - vernfp2007rt - premium accounti...,1,3.970252e-01
3,3,singing coach unlimited - electronic learning ...,singing coach unlimited,1.000000,learn to sing with the help of a patented real...,singing coach unlimited - electronic learning ...,1,4.202613e-01
4,4,upgrade only; installation of after effects st...,adobe after effects professional 6.5 upgrade f...,0.823889,adobe after effects pb 6.5 win upgrade.standar...,adobe software 22070152 after effects 6.5 pbupgrd,1,7.684295e-01
5,5,in mia's math adventure: just in time children...,mia's math adventure: just in time,0.771561,mia's math adventure tells a captivating story...,kutoka interactive 61208 mia's math adventure:...,1,2.907027e-01
6,6,in mia's math adventure: just in time children...,mia's math adventure: just in time,0.836395,kutoka interactive 61208 : mia s math adventur...,kutoka interactive 61208 - mias math adventure...,1,6.782923e-01
7,7,disney's 1st & 2nd grade bundle will help your...,disney's 1st & 2nd grade bundle (pixar 1st gra...,0.701580,disney learning 1st & 2nd features an all-star...,disney learning 1st-2nd grade win,1,5.810933e-01
8,8,how many times have you heard that it's the th...,paper art: gift wrapping,0.515701,how many times have you heard that it's the th...,paper art volume 2: gift wrapping,1,1.818198e-01
9,9,,spy sweeper spanish,1.000000,a sypware infection is no longer just a matter...,webroot software inc - 31250 - spy sweeper spa...,1,4.754089e-01


In [81]:
def xWordSame(s1, s2, ind):
#     for tkn in [",", "for", "with", "by"]:
#         s1_arr = s1.split(tkn)
#         s1_arr.reverse()
#         s2_arr = s2.split(tkn)
#         s2_arr.reverse()
#         s1 = ' '.join(s1_arr)
#         s2 = ' '.join(s2_arr)
    return s1.split()[ind]==s2.split()[ind]

data["lastWordSame"] = list(map(lambda x: xWordSame(x[0],x[1], -1), zip(data.google_name, data.amazon_name)))
data["firstWordSame"] = list(map(lambda x: xWordSame(x[0],x[1], 0), zip(data.google_name, data.amazon_name)))

In [82]:
# split data for cross validation
data = data.iloc[np.random.permutation(data.shape[0])]
msk = np.random.rand(data.shape[0]) < .8
train = data[msk]
test = data[~msk]
# trainData, trainLabels = train[['nameDistance', 'descriptionDistance']], train['match']
# testData, testLabels = test[['nameDistance', 'descriptionDistance']], test['match']
trainFeatures = ['nameDistance'
                 , 'lastWordSame'
                 , "firstWordSame"
#                  ,'descriptionDistance'
                ]
trainData, trainLabels = train[trainFeatures], train['match']
testData, testLabels = test[trainFeatures], test['match']

In [83]:
# fit and test a model
clf = RandomForestClassifier(100)
clf.fit(trainData, trainLabels)
preds = clf.predict(testData)
print("accuracy:\n" + str(np.mean(preds==testLabels)))
right = test.loc[preds==testLabels]
truePositives = right[right.match==1]
wrong = test.loc[preds!=testLabels]

accuracy:
0.952332657201

false positive count:
16

false positives


Unnamed: 0.1,Unnamed: 0,amazon_description,amazon_name,descriptionDistance,google_description,google_name,match,nameDistance,firstWordSame,lastWordSame
2645,45,quickbooks pro 2007 provides standard accounti...,quickbooks pro 2007 small business financial s...,0.892326,learning quickbooks 2007,learning quickbooks 2007,0,0.708931,False,False
3310,710,high school advantage 2008 was specially devel...,high school advantage 2008,0.71128,elementary school advantage? 2008 delivers the...,encore inc elementary advantage 2008,0,0.708781,False,True
4755,2155,,synet pc sharing kit(w/17ft usb ext.cable),1.0,nics electronics sup-040c : usually ships in 2...,sup-040c synet pc sharing kit - w/ usb cable 5...,0,0.572837,False,False
2774,174,boostxp for windows collects the tools that ke...,allume boost xp for windows,0.83675,hp eu063av aba : usually ships in 24 hours : :...,eu063av aba microsoft windows xp professional ...,0,0.847686,False,False
5032,2432,the movies answers a question that many of us ...,the movies (mac),0.836494,1. get ready 2. my girl 3. i can't help myself...,the richard friedman trio motown hits - *(for ...,0,0.798007,True,False
3509,909,with 3d home architect landscape designer 8.0 ...,3d home architect landscape designer v8,0.668364,the ultimate easy-to-use home design and remod...,encore inc 3d home architect home design delux...,0,0.66399,False,False
3727,1127,premiere elements brings your home videos to l...,adobe premiere elements 3.0 [lb],0.953805,system requirements: powerpc® g3 g4 or g5 proc...,adobe photoshop elements 4.0 mac,0,0.663903,True,False
3224,624,5 in 1 home design is a fully integrated 3-d h...,punch 5 in 1 home design,0.640651,punch! software 24100 : usually ships in 24 ho...,24100 punch! 5 in 1 home design - complete pac...,0,0.497671,False,False
3226,626,5 in 1 home design is a fully integrated 3-d h...,punch 5 in 1 home design,0.640651,punch! software 24100 : usually ships in 24 ho...,24100 punch! 5 in 1 home design - complete pac...,0,0.497671,False,False
3309,709,middle school advantage 2008 delivers award-wi...,middle school advantage 2008,0.668823,elementary school advantage? 2008 delivers the...,encore inc elementary advantage 2008,0,0.708781,False,True


In [15]:
with open("nameModel.pickle", 'wb') as f:
    pickle.dump(clf, f, -1)

In [158]:
# find price differences for matched products

priceData = pd.merge(amazon, amazon_google, left_on="id", right_on="idAmazon")
priceData = pd.merge(priceData, google, left_on="idGoogleBase", right_on="id")
priceData = priceData[['title', 'idAmazon', 'price_x', 'name',  'idGoogleBase', 'price_y']]
priceData.columns = ['amazon_name', 'amazon_id'
                     , 'amazon_price', 'google_name', 'google_id', 'google_price']

def floatMe(s):
    try:
        return float(s)
    except:
        return 0
    
priceData['amazon_price'] = priceData.amazon_price.map(floatMe)
priceData['google_price'] = priceData.google_price.map(floatMe)

priceData = priceData.loc[priceData.amazon_price!=0]
priceData = priceData.loc[priceData.google_price!=0]

priceData['priceDiff'] = priceData.amazon_price - priceData.google_price
priceData = priceData.sort('priceDiff')
priceData

Unnamed: 0,amazon_name,amazon_id,amazon_price,google_name,google_id,google_price,priceDiff
114,watchguard serverlock manager (100 servers),b00005atxo,14995.00,serverlock manager - 100 servers,http://www.google.com/base/feeds/snippets/1143...,56543.88,-41548.88
456,steinberg halion symphonic orchestra virtual i...,b000k6b2l0,374.99,steinberg software halion symphonic orchestra ...,http://www.google.com/base/feeds/snippets/1783...,545.51,-170.52
575,adobe photoshop cs2 (mac) [old version],b00081i6ji,649.00,adobe systems inc 13102124 adobe photoshop cs ...,http://www.google.com/base/feeds/snippets/1841...,788.63,-139.63
494,microsoft windows terminal server 2003 client ...,b00008mnxz,669.00,win 2003 ter svr cal 5pk microsoft r19-00846,http://www.google.com/base/feeds/snippets/1838...,762.95,-93.95
425,adobe creative suite cs3 design premium [mac],b000ndibri,1799.00,adobe cs3 design premium,http://www.google.com/base/feeds/snippets/2224...,1865.99,-66.99
1009,aquazone bass edition,b000gaqlxe,19.99,allume checkit system performance suite,http://www.google.com/base/feeds/snippets/1382...,84.99,-65.00
321,serious magic ultra2 master sets library 2 ( w...,b0009yx9by,276.50,serious magic ultra master sets library 1 effe...,http://www.google.com/base/feeds/snippets/5238...,339.95,-63.45
388,adobe creative suite cs3 web premium [mac],b000ndibvo,1599.00,adobe cs3 web premium,http://www.google.com/base/feeds/snippets/5767...,1659.99,-60.99
1011,adobe creative suite cs3 master collection upsell,b000ndibl4,1999.00,adobe cs3 master collection upsell from cs1,http://www.google.com/base/feeds/snippets/1499...,2054.99,-55.99
742,adobe creative suite cs3 production premium,b000ogsyjy,1699.00,adobe cs3 production premium,http://www.google.com/base/feeds/snippets/1657...,1749.99,-50.99


In [204]:
# calculate the potential value of arbitrage opportunities discovered in the test data
truePositivePrices = pd.merge(truePositives, priceData)
truePositivePrices['absPriceDiff'] = truePositivePrices.priceDiff.map(np.absolute)
print("Number of test examples:\n" + str(test.shape[0]))
print("Number of correctly matched examples:\n" 
      + str(truePositivePrices.shape[0]))
print("Number of correctly matched examples with identical name:\n" 
      + str(truePositivePrices.loc[truePositivePrices.amazon_name 
                                   == truePositivePrices.google_name].shape[0]))
print("Number of correctly matched examples with price differences:\n" 
      + str(truePositivePrices.loc[truePositivePrices.absPriceDiff != 0].shape[0]))
print("Number of Correctly Matched Examples")
print("Net Price Differences in Correctly Matched Products:\n" + str(truePositivePrices.absPriceDiff.sum()))
print("Net Price Differences under $2500 in Matched Products:\n" 
      + str(truePositivePrices.loc[truePositivePrices.absPriceDiff < 2500].absPriceDiff.sum()))
print("Net Price Differences under $500 in Matched Products:\n" 
      + str(truePositivePrices.loc[truePositivePrices.absPriceDiff < 500].absPriceDiff.sum()))
truePositivePrices.sort('absPriceDiff', 0, False)
truePositivePrices.loc[truePositivePrices.absPriceDiff < 10]

Number of test examples:
529
Number of correctly matched examples:
257
Number of correctly matched examples with identical name:
7
Number of correctly matched examples with price differences:
225
Number of Correctly Matched Examples
Net Price Differences in Correctly Matched Products:
55029.47
Net Price Differences under $2500 in Matched Products:
8934.48
Net Price Differences under $500 in Matched Products:
5204.11


Unnamed: 0,amazon_name,amazon_description,google_name,google_description,match,descriptionDistance,nameDistance,amazon_id,amazon_price,google_id,google_price,priceDiff,absPriceDiff
0,adobe illustrator cs3 [mac],adobe illustrator cs3 software allows you to e...,adobe illustrator cs3 (mac),key features: live color live trace live paint...,1,0.937273,0.000000e+00,b000ndib8m,599.00,http://www.google.com/base/feeds/snippets/1083...,599.00,0.00,0.00
2,national geographic photo gallery (jewel case),national geographic photo gallery gives you th...,national geographic photo gallery topics,key features: 3000 color photos meet the photo...,1,0.927559,4.196670e-01,b0000afu4e,9.99,http://www.google.com/base/feeds/snippets/1037...,9.95,0.04,0.04
3,hoyle: classic collection 2006,like a virtual casino on your pc / software in...,encore software 11052 - hoyle: classic collect...,encore software 11052 : hoyle the 1 brand in f...,1,0.946339,5.505636e-01,b000hlp5yi,19.99,http://www.google.com/base/feeds/snippets/9785...,18.97,1.02,1.02
4,caillou ready for school (pc & mac),caillou helps young children to make sense of ...,caillou ready for school by brighter minds,macintosh minimums requirements 266mhz g3 proc...,1,0.958309,5.497319e-01,b000a0s3x6,19.99,http://www.google.com/base/feeds/snippets/1601...,19.99,0.00,0.00
5,photoedit 2 (mac),photoedit 2 goes beyond basic photo enhancemen...,summitsoft corp photoedit 2,becoming a photography pro is child's play usi...,1,0.813048,7.394443e-01,b0009jearq,29.99,http://www.google.com/base/feeds/snippets/1719...,26.14,3.85,3.85
6,adobe photoshop elements 5.0/premier elements ...,model- adbcd00166wi vendor- adobe software fea...,adobe premiere elements 3 and photoshop elemen...,premiere elements 3 and photoshop elements 5 s...,1,0.668956,6.387891e-01,b000hlp5do,149.99,http://www.google.com/base/feeds/snippets/4835...,149.95,0.04,0.04
7,vegas movie studiodvd+8 plat,becoming a movie mogul or star is easy with so...,sony vegas movie studio+dvd v8 platinum editio...,vegas movie studio+dvd v8 platinum edition vid...,1,0.620076,8.585694e-01,b000ro5o7s,129.95,http://www.google.com/base/feeds/snippets/7129...,129.95,0.00,0.00
8,apple remote desktop 2.2 10 client (old version),remote desktop 2.2 makes it easier than it's e...,apple software m9953z/a remote desktop 2.2 10 ...,the 10-client version allows one administrator...,1,0.803277,4.413822e-01,b0009dwkwo,299.00,http://www.google.com/base/feeds/snippets/9753...,294.99,4.01,4.01
9,apple remote desktop 2.2 10 client (old version),remote desktop 2.2 makes it easier than it's e...,apple software m9953z/a remote desktop 2.2 10 ...,the 10-client version allows one administrator...,1,0.803277,4.413822e-01,b0009dwkwo,299.00,http://www.google.com/base/feeds/snippets/8882...,294.99,4.01,4.01
10,apple remote desktop 2.2 10 client (old version),remote desktop 2.2 makes it easier than it's e...,apple software m9953z/a remote desktop 2.2 10 ...,the 10-client version allows one administrator...,1,0.803277,4.413822e-01,b0009dwkwo,299.00,http://www.google.com/base/feeds/snippets/1574...,294.99,4.01,4.01


In [12]:
a = 'hat'.split()
a.reverse()
a

['hat']