In [1]:
'''
Code copied from this source as recommended by Ebay official since current version of scikit-learn does not have pair confusion matrix
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/cluster/_supervised.py
'''
import numpy as np
from sklearn.metrics.cluster._supervised import contingency_matrix, check_clusterings
def pair_confusion_matrix(labels_true, labels_pred):
    """Pair confusion matrix arising from two clusterings.
    The pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix
    between two clusterings by considering all pairs of samples and counting
    pairs that are assigned into the same or into different clusters under
    the true and predicted clusterings.
    Considering a pair of samples that is clustered together a positive pair,
    then as in binary classification the count of true negatives is
    :math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is
    :math:`C_{11}` and false positives is :math:`C_{01}`.
    Read more in the :ref:`User Guide <pair_confusion_matrix>`.
    Parameters
    ----------
    labels_true : array-like of shape (n_samples,), dtype=integral
        Ground truth class labels to be used as a reference.
    labels_pred : array-like of shape (n_samples,), dtype=integral
        Cluster labels to evaluate.
    Returns
    -------
    C : ndarray of shape (2, 2), dtype=np.int64
        The contingency matrix.
    See Also
    --------
    rand_score: Rand Score
    adjusted_rand_score: Adjusted Rand Score
    adjusted_mutual_info_score: Adjusted Mutual Information
    Examples
    --------
    Perfectly matching labelings have all non-zero entries on the
    diagonal regardless of actual label values:
      >>> from sklearn.metrics.cluster import pair_confusion_matrix
      >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])
      array([[8, 0],
             [0, 4]]...
    Labelings that assign all classes members to the same clusters
    are complete but may be not always pure, hence penalized, and
    have some off-diagonal non-zero entries:
      >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])
      array([[8, 2],
             [0, 2]]...
    Note that the matrix is not symmetric.
    References
    ----------
    .. L. Hubert and P. Arabie, Comparing Partitions, Journal of
      Classification 1985
      https://link.springer.com/article/10.1007%2FBF01908075
    """
    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
    n_samples = np.int64(labels_true.shape[0])

    # Computation using the contingency data
    contingency = contingency_matrix(
        labels_true, labels_pred, sparse=True
        )#, dtype=np.int64)
    n_c = np.ravel(contingency.sum(axis=1))
    n_k = np.ravel(contingency.sum(axis=0))
    sum_squares = (contingency.data ** 2).sum()
    C = np.empty((2, 2), dtype=np.int64)
    C[1, 1] = sum_squares - n_samples
    C[0, 1] = contingency.dot(n_k).sum() - sum_squares
    C[1, 0] = contingency.transpose().dot(n_c).sum() - sum_squares
    C[0, 0] = n_samples ** 2 - C[0, 1] - C[1, 0] - sum_squares
    return C

# Pre-Processing Data

In [9]:
import pandas as pd
akshit_df = './mlchallenge_set_2021.tsv'
akshit_valid = './mlchallenge_set_validation.tsv'
sam_df = 'C:/Users/sjmal/OneDrive/Desktop/ML/2021/mlchallenge_set_2021_edited.txt'
sam_valid = 'C:/Users/sjmal/OneDrive/Desktop/ML/2021/mlchallenge_set_validation.tsv'
SA_valid=pd.read_table(akshit_valid,header=None)
SA_df=pd.read_table(akshit_df,header=None)
df=SA_df
valid=SA_valid 
#df = pd.read_table(akshit_df)
#valid = pd.read_table(akshit_valid,sep='\t')
SA_valid = valid
SA_df = df
df.columns=['category','primary_image_url','All Links','Tags','index']
valid.columns=['ID', 'Group']
valid.set_index('ID', inplace=True)

In [10]:
"""
Import test output file as dataframe and set the index.
"""
output = './akshit.tsv'
predictions = pd.read_table(output,header=None)
predictions.columns = ['ID', 'Group']
predictions.set_index('ID', inplace=True)

In [11]:
print(predictions)

          Group
ID             
0             1
1             2
2             3
3             4
4             5
...         ...
1006085  900113
1006086  900114
1006087  900115
1006088  900116
1006089  900117

[1002275 rows x 1 columns]


In [12]:
"""
Compute confusion matrix for validation data. Use valid dataframe to extract values from a tsv output file.
"""
preds = []
actuals = []
for index, row in valid.iterrows():
    preds.append(predictions.loc[index]['Group'])
    actuals.append(row['Group'])
# print(preds[100:200])
# print(actuals[100:200])
def evaluate(actuals, preds):
    C = pair_confusion_matrix(actuals, preds)
    precision = C[1][1] / (C[1][1] + C[0][1]) # (true positives) / (true positives + false positives)
    recall = C[1][1] / (C[1][1] + C[1][0]) # (true positives) / (true positives + false negatives)
    print(f'Confusion Matrix:\n{C}')
    print(f'precision: {precision}')
    print(f'recall: {recall}')
    print(f'f1 score: {2 * precision * recall / (precision + recall)}')

evaluate(actuals, preds)

Confusion Matrix:
[[154807560      2892]
 [     3974      1380]]
precision: 0.32303370786516855
recall: 0.2577512140455734
f1 score: 0.2867234573031373


In [13]:
print(df.head())
print(valid.head())

   category                                  primary_image_url  \
0         2  https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/iYYA...   
1         2  https://i.ebayimg.com/00/s/MTA1OFgxMTM0/z/KPIA...   
2         2  https://i.ebayimg.com/00/s/MTIwMFgxNjAw/z/flIA...   
3         2  http://i.ebayimg.com/00/s/ODAwWDEwNjc=/z/XHcAA...   
4         2  https://i.ebayimg.com/00/s/MTA2N1gxNjAw/z/scsA...   

                                           All Links  \
0  https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/iYYA...   
1  https://i.ebayimg.com/00/s/MTA1OFgxMTM0/z/KPIA...   
2  https://i.ebayimg.com/00/s/MTIwMFgxNjAw/z/flIA...   
3  http://i.ebayimg.com/00/s/ODAwWDEwNjc=/z/XHcAA...   
4  https://i.ebayimg.com/00/s/MTA2N1gxNjAw/z/scsA...   

                                                Tags  index  
0  (Brand:Shimano,US Shoe Size (Men's):4.5,Modifi...      0  
1  (Color:Gray/White,Country/Region of Manufactur...      1  
2  (Style:Cleats,Color:White Orange,US Shoe Size ...      2  
3  (Width:Medium (

In [14]:
#split links into list (for now only does first 100)
i = 0
for link in df['All Links'][0:100]:
    df['All Links'][i] = link.split(';')
    i+=1
print(df['All Links'][0:100])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


0     [https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/iYY...
1     [https://i.ebayimg.com/00/s/MTA1OFgxMTM0/z/KPI...
2     [https://i.ebayimg.com/00/s/MTIwMFgxNjAw/z/flI...
3     [http://i.ebayimg.com/00/s/ODAwWDEwNjc=/z/XHcA...
4     [https://i.ebayimg.com/00/s/MTA2N1gxNjAw/z/scs...
                            ...                        
95    [https://i.ebayimg.com/00/s/MTU5OVgxNTgx/z/vMA...
96    [https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/GoA...
97    [https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/Hm0...
98    [https://i.ebayimg.com/00/s/MTYwMFgxMjAw/z/VhA...
99    [https://i.ebayimg.com/00/s/MTYwMFgxNjAw/z/n4o...
Name: All Links, Length: 100, dtype: object


In [15]:
"""
Frequency counting of the tags in a particular category.
"""
import re
from collections import Counter
# for category in range(1, 6):
df = SA_df
df = df.loc[df['category'] == 5]
freq=Counter()
attribute=[['']]*len(df)
print('allocated array')
trialrange=len(df)
for x in range(0, trialrange, 5):#range(int(len(df)/10)):#len(df)
    attribute[x]=df.iloc[x,3].lower()
    attribute[x] = attribute[x][1:-1] # remove starting and ending parentheses
    attribute[x] = re.split(r',(?![^(]*\))', attribute[x]) # ignore commas that are inside parentheses
    attribute[x] = [a.split(':') for a in attribute[x]]
    freq+=Counter([i[0] for i in attribute[x]])
    tempdict={}
#     print(df.iloc[x,3])
    #print(attribute[x])
#     print(attribute)
    for i in attribute[x]:
        try:
            tempdict[i[0]]=float(i[1])
        except:
            try:
                tempdict[i[0]]=i[1]
            except:
                pass
    attribute[x]=tempdict
#     print(attribute[x])
df = SA_df
print("am out of loop")

allocated array
am out of loop


In [16]:
d = dict((k, v) for k, v in freq.items() if v > 20)
array = list(sorted(d, key=lambda k: d[k],reverse=True))[:10]
d= {k: v for k, v in sorted(d.items(), key=lambda item: item[1],reverse=True)}
print(d)
print(array)
import matplotlib.pyplot as plt
#import matplotlib.pyplot as plt
plt.bar(d.keys(),d.values())
plt.show()

['brand', 'mpn', 'model', 'type', 'upc', 'ean', 'cord type', 'bagged/bagless', 'color', 'filters']


<Figure size 640x480 with 1 Axes>

In [17]:
df = SA_df

In [4]:
import re
validation_rows = df.loc[df['index'].isin(valid.index)]
validation_rows = validation_rows.loc[validation_rows['category'] == 1]
c1_rows = df.loc[df['category'] == 1]
c3_rows = df.loc[df['category'] == 3]
c3_indices = c3_rows.copy()
c3_indices.set_index('index', inplace=True)
c4_rows = df.loc[df['category'] == 4]
c4_indices = c4_rows.copy()
c4_indices.set_index('index', inplace=True)
keep_tags = ['brand', 'size type', "bottoms size (women's)", 'material', 'inseam', 'color', 'rise', 'style', 'silhouette', 'country/region of manufacture']
keep_tags = ['brand', 'material', 'type', 'pattern', 'model', 'country/region of origin', 'mpn', 'country/region of manufacture', 'handle material', 'number of settings','color']
# keep_tags =  ['brand', 'color', 'material', 'type', 'mpn', 'pattern', 'bed size', 'size', 'country/region of manufacture', 'style']
# df = category5
def register_attributes(attribute, all_attributes):
    attribute = attribute[1:-1].lower() # attribute = re.sub(r'[()]','', attribute)
    attribute = re.split(r',(?![^(]*\))', attribute)
    attribute = [a.split(':') for a in attribute]
    for i, a in enumerate(attribute):
        attribute[i] = [s.strip() for s in a]
        if attribute[i][0] in keep_tags:
            all_attributes.add(attribute[i][0])
    #print(f'atttribute is: {attribute}')
    mapping = {}
    #for i in range(len(attribute) - 1):
    #    if i == len(attribute) - 2:
    #        mapping[attribute[i][-1]] = attribute[i + 1][:]
    #    else:
    #        mapping[attribute[i][-1]] = attribute[i + 1][:-1]
    return(attribute)

def map_attributes(attribute, num_attributes, index_to_attr):
    attribute = attribute[1:-1].lower() # attribute = re.sub(r'[()]','', attribute)
    attribute = re.split(r',(?![^(]*\))', attribute)
    attribute = [a.split(':') for a in attribute]
    all_attributes_for_row = [None] * num_attributes
    for i, a in enumerate(attribute):
        attribute[i] = [s.strip() for s in a]
        #print(f'index: {attr_to_index[attribute[i][0]]}')
        if len(attribute[i]) > 1 and attribute[i][0] in keep_tags:
            all_attributes_for_row[attr_to_index[attribute[i][0]]] = attribute[i][1]
    mapping = {}
    #for i in range(len(attribute) - 1):
    #    if i == len(attribute) - 2:
    #        mapping[attribute[i][-1]] = attribute[i + 1][:]
    #    else:
    #        mapping[attribute[i][-1]] = attribute[i + 1][:-1]
    return all_attributes_for_row
m = 2000
all_attributes = set()
all_maps = []
for index,row in c3_rows[0:].iterrows():
    register_attributes(row['Tags'], all_attributes)

all_attributes = list(all_attributes)
attr_to_index = {all_attributes[i]: i for i in range(len(all_attributes))}
#print(attr_to_index)
#print(f'numAttributes: {len(all_attributes)}')

for index,row in c3_rows[0:].iterrows():
    all_maps.append(map_attributes(row['Tags'], len(all_attributes), attr_to_index))
possible_vals = set()
from collections import Counter
c = Counter()
for v in all_maps:
    possible_vals.update(v)
    c.update(v)
print(len(possible_vals))
print(c)

# CNN to One Hot Encoder

In [25]:
#dataframe with attribute values
#print(type(all_maps))
categories = pd.DataFrame(all_maps)
categories.columns=all_attributes
print(categories)

       size                 color  bed size country/region of manufacture  \
0      None                  None      None                          None   
1      None                  gray      crib                          None   
2      None                  None      None                          None   
3      None                  blue     queen                          None   
4      None        birch grey fan  any size                         india   
...     ...                   ...       ...                           ...   
42861  full              burgundy      None                          None   
42862  None            royal blue      None                          None   
42863  None            gray/white      None                          None   
42864  None                 green      None                          None   
42865  None  all colors available      None                         india   

                        brand              mpn         material  \
0       

In [32]:
from sklearn.preprocessing import OneHotEncoder
oh = OneHotEncoder()
x = oh.fit_transform(categories.astype(str))
x=x.toarray()
print(x)#[0][440:450])
print(type(x))

[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
<class 'numpy.ndarray'>


In [33]:
colorind=all_attributes.index('color')
temp=list(oh.categories_[colorind])#[1339]
simple_colors = {'black':0,'white':0,'red':0,'blue':0,'green':0,'yellow':0,'brown':0,'purple':0,'pink':0,'gray':0,'grey':0}

for key in simple_colors.keys():
    try:
        simple_colors[key]=temp.index(key)
    except:
        print(key)
print(simple_colors)

{'black': 74, 'white': 438, 'red': 332, 'blue': 102, 'green': 194, 'yellow': 453, 'brown': 108, 'purple': 315, 'pink': 304, 'gray': 192, 'grey': 201}


In [35]:
from ast import literal_eval
maxind=x.shape[0]
for filenum in range(1,69): #later change 3 to 69
    cnn=pd.read_csv('CNN predictions/predictions'+str(filenum)+'.csv',index_col=0)
    si = len(cnn)
    print(filenum)
    for i in range(si): #change 5 to si
        ind=cnn.iloc[i].loc['ID']
        #print(ind)
        if(ind>=maxind):
            break
        colortup=cnn.iloc[i].loc['Predictions']
        colortup=colortup[1:-1]
        colortup=literal_eval(colortup)
        for y in colortup:
            #print(y)
            x[ind][simple_colors[y[0]]]=y[1]
            #insert y[1] at onehotencoder matrix (ind,simple_color[y[0]])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68


In [24]:
'''#does not work
from numpy import savetxt
savetxt('onehot.csv', x, delimiter=',')
'''

KeyboardInterrupt: 

In [36]:
from scipy import sparse
x=sparse.csr_matrix(x)
print(x)

  (0, 260)	0.0003537335
  (0, 342)	0.0023856151
  (0, 441)	1.0
  (0, 571)	1.7122915e-06
  (0, 1339)	0.012617967
  (0, 1410)	0.015659288
  (0, 2288)	2.5023717e-05
  (0, 2474)	0.038465764
  (0, 2549)	3.063071e-05
  (0, 2685)	1.0
  (0, 3197)	0.9304554
  (0, 3677)	4.875297e-06
  (0, 4646)	1.0
  (0, 4839)	1.0
  (0, 6523)	1.0
  (0, 13675)	1.0
  (0, 16909)	1.0
  (0, 17556)	1.0
  (0, 18437)	1.0
  (0, 20235)	1.0
  (1, 441)	1.0
  (1, 1912)	1.0
  (1, 4543)	1.0
  (1, 4839)	1.0
  (1, 7843)	1.0
  :	:
  (42864, 11573)	1.0
  (42864, 16909)	1.0
  (42864, 17556)	1.0
  (42864, 18437)	1.0
  (42864, 20235)	1.0
  (42865, 260)	0.0019616762
  (42865, 342)	0.027237786
  (42865, 441)	1.0
  (42865, 571)	0.00038952043
  (42865, 719)	1.0
  (42865, 1339)	0.044497576
  (42865, 1410)	0.02223185
  (42865, 2288)	2.9962548e-05
  (42865, 2474)	0.00044774055
  (42865, 2549)	0.04073774
  (42865, 3197)	0.86213225
  (42865, 3677)	0.00033399908
  (42865, 4646)	1.0
  (42865, 4824)	1.0
  (42865, 5880)	1.0
  (42865, 14783)	1.0
 

# K-Means: Big Picture Clustering

# K-Means: Big Picture Clustering

In [31]:
print(c4_rows.head())

       category                                  primary_image_url  \
32255         4  https://i.ebayimg.com/00/s/NDUwWDU5Mw==/z/YbQA...   
32256         4  http://i.ebayimg.com/00/s/ODQxWDEyMDA=/z/rqoAA...   
32257         4  https://i.ebayimg.com/00/s/OTAwWDEyMDA=/z/ViEA...   
32258         4  http://i.ebayimg.com/00/s/MTYwMFgxMzA2/z/ipIAA...   
32259         4  https://i.ebayimg.com/00/s/MTU5OVgxMjAw/z/OeUA...   

                                               All Links  \
32255  https://i.ebayimg.com/00/s/NDUwWDU5Mw==/z/YbQA...   
32256  http://i.ebayimg.com/00/s/ODQxWDEyMDA=/z/rqoAA...   
32257  https://i.ebayimg.com/00/s/OTAwWDEyMDA=/z/ViEA...   
32258  http://i.ebayimg.com/00/s/MTYwMFgxMzA2/z/ipIAA...   
32259  https://i.ebayimg.com/00/s/MTU5OVgxMjAw/z/OeUA...   

                                                    Tags  index  
32255             (MPN:Does not Apply,Brand:Home Source)  32604  
32256  (Bed Size:Crib,Brand:Pottery Barn Kids,Color:g...  32605  
32257               

In [32]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=200, random_state=0,verbose=True, n_init=1, max_iter = 15).fit(x)

Initialization complete
Iteration 0, inertia 244300.0
Iteration 1, inertia 161463.84402423847
Iteration 2, inertia 157319.0307040155
Iteration 3, inertia 155748.58302736084
Iteration 4, inertia 154877.50577976013
Iteration 5, inertia 154414.81079414586
Iteration 6, inertia 154144.4275253625
Iteration 7, inertia 153944.6661662255
Iteration 8, inertia 153733.22160036018
Iteration 9, inertia 153506.09136626121
Iteration 10, inertia 153405.80915242637
Iteration 11, inertia 153338.97435877848
Iteration 12, inertia 153275.9882537595
Iteration 13, inertia 153211.4333846787
Iteration 14, inertia 153085.817547771


In [57]:
count = {}
for i, label in enumerate(kmeans.labels_):
    if label not in count:
        count[label] = 0
    count[label] += 1
print(count)
print(list(sorted(count.values()))[-1:-10:-1])

{11: 949, 138: 154, 152: 107, 69: 311, 20: 273, 182: 240, 47: 344, 58: 344, 75: 237, 14: 219, 109: 289, 52: 184, 60: 429, 62: 222, 115: 225, 146: 137, 179: 105, 117: 192, 191: 244, 61: 71, 80: 362, 81: 205, 197: 142, 10: 293, 67: 244, 22: 1038, 13: 219, 199: 492, 71: 162, 161: 123, 55: 186, 183: 179, 159: 49, 113: 181, 88: 262, 39: 202, 166: 206, 163: 264, 50: 225, 12: 166, 162: 116, 178: 69, 89: 58, 211: 1497, 116: 249, 188: 144, 143: 305, 170: 352, 53: 245, 158: 274, 171: 136, 65: 315, 118: 240, 181: 172, 134: 58, 106: 178, 66: 235, 135: 71, 57: 95, 87: 260, 112: 215, 17: 337, 29: 304, 1: 173, 48: 196, 27: 857, 127: 185, 180: 388, 15: 214, 64: 187, 100: 188, 153: 116, 186: 256, 184: 243, 145: 98, 76: 463, 73: 201, 136: 105, 56: 176, 130: 113, 31: 459, 110: 271, 19: 303, 42: 142, 43: 113, 105: 209, 155: 385, 51: 256, 132: 23, 5: 313, 3: 227, 83: 139, 72: 217, 160: 211, 37: 466, 7: 178, 18: 339, 190: 170, 2: 1206, 28: 271, 133: 149, 131: 273, 96: 407, 120: 99, 74: 203, 6: 323, 38: 70, 

In [56]:
# reduce size of k means clusters that are too large
THRESH = 1500
labels = kmeans.labels_
count_so_far = max(labels)
for label in count:
    if count[label] > THRESH:
        # run k-means again
        indices = [i for i in range(x.shape[0]) if labels[i] == label]
        items = [x[i] for i in indices]# get items matching the label
        km = KMeans(n_clusters=20, random_state=0,verbose=True, n_init=1, max_iter = 15).fit(items)
        for km_index, i in enumerate(indices):
            labels[i] = km.labels_[km_index] + count_so_far
        count_so_far = max(labels)


Initialization complete
Iteration 0, inertia 10630.0
Iteration 1, inertia 5529.42577252193
Iteration 2, inertia 5523.584319097626
Iteration 3, inertia 5522.662833614143
Converged at iteration 3: strict convergence.


In [60]:
"""
Analysis of K-Means output
"""
preds = []
actuals = []
for i, label in enumerate(labels):
#     print(f'row {i}\n: {df.iloc[i]}')
    ID = c4_rows.iloc[i]['index']
    if ID in valid.index:
        preds.append(label)
        actual = valid.loc[ID]['Group']
        actuals.append(actual)
#     print(f'ID: {ID}, pred: {label}, actual: {actual}')
print(kmeans.inertia_)
print(len(preds))

evaluate(actuals, preds)

153046.58832855825
600
Confusion Matrix:
[[355110   3996]
 [    48    246]]
precision: 0.05799151343705799
recall: 0.8367346938775511
f1 score: 0.10846560846560846


# Aggloromative: Granualar Clustering

In [61]:
from sklearn.cluster import AgglomerativeClustering

def cluster(data):
    clustering = AgglomerativeClustering(compute_full_tree = True,distance_threshold = 0.1,n_clusters=None).fit(data)
    print(clustering)
#     print("LABELS")
#     print(clustering.labels_)
#     print(len(clustering.labels_))
#     print(len(set(clustering.labels_)))
#     print("DISTAnCES")
#     print(clustering.distances_)
#     print("num connected components")
#     print(clustering.n_connected_components_)
    # make this better
    # make this work on the entire dataset
    # fix nonetypes
    # don't punish missing attributes, but punish conflicts. how do we encode this?

    return clustering

In [62]:
x = x.toarray() # only run this once

AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [None]:
# x *= 10
x

In [64]:
full_clustering = [0] * len(x)
count_so_far = 0
for label in count:
    indices = [i for i in range(x.shape[0]) if labels[i] == label]
    data = [x[i] for i in indices]
    print(f'now clustering group {label} which has {len(data)} items')
    c = cluster(data)
    for i, label in enumerate(c.labels_):
        full_clustering[indices[i]] = label + count_so_far
#     print(c.labels_)
    count_so_far += len(data)

now clustering group 11 which has 949 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 138 which has 154 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 152 which has 107 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 69 which has 311 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 20 which has 273 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 182 which has 240 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 47 which has 344 items
Agglomerati

AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 181 which has 172 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 134 which has 58 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 106 which has 178 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 66 which has 235 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 135 which has 71 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 57 which has 95 items
AgglomerativeClustering(compute_full_tree=True, distance_

AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 38 which has 70 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 122 which has 113 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 54 which has 56 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 195 which has 112 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 114 which has 160 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 34 which has 45 items
AgglomerativeClustering(compute_full_tree=True, distance_t

AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 147 which has 107 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 192 which has 42 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 16 which has 68 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 144 which has 73 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 165 which has 133 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 63 which has 75 items
AgglomerativeClustering(compute_full_tree=True, distance_t

AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 225 which has 65 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 206 which has 16 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 246 which has 11 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 233 which has 50 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 220 which has 51 items
AgglomerativeClustering(compute_full_tree=True, distance_threshold=0.1,
                        n_clusters=None)
now clustering group 237 which has 30 items
AgglomerativeClustering(compute_full_tree=True, distance_t

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [65]:
groups = {}
for i, label in enumerate(full_clustering):
    if label not in groups:
        groups[label] = []
    groups[label].append(i)
groups = {label: groups[label] for label in groups if len(groups[label]) > 1}
print(groups)
for label in groups:
    print(f'GROUP: {label}')
    for item in groups[label]:
        print(c4_rows['Tags'].iloc[[item]].to_string())
    print('-----------------')

{949: [1, 9596, 41928], 1103: [2, 176, 177, 2808, 2809, 2810, 2811, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 5730, 5731, 5732, 5905, 5906, 5907, 5908, 5909, 8718, 8719, 8720, 8721, 8722, 11319, 11486, 11487, 11488, 11489, 11490, 11491, 14144, 14313, 14314, 14315, 16881, 16882, 17057, 17058, 17059, 17060, 17061, 17062, 19635, 19636, 19637, 19789, 19790, 19791, 19792, 19793, 22380, 22381, 22382, 22383, 22555, 22556, 22557, 22558, 22559, 22560, 22561, 25345, 26590, 26947, 27524, 27598, 28047, 28832, 29640, 29750, 30356, 30607, 30646, 30931, 31047, 31055, 31289, 32177, 33196, 35059, 36061, 36104, 36108, 36278, 36991, 37383, 38097, 38852, 39222, 39327, 39339, 39504, 39725, 40427, 41187, 41887, 42038, 42063, 42250, 42535, 42702, 42782], 2389: [7, 8, 5735, 5736, 14152, 14153, 14154, 16893, 19643, 19644, 27357, 27782], 2741: [9, 20916], 3668: [13, 154, 155, 180, 184, 347, 356, 675, 916, 1189, 1281, 1296, 1979, 2259, 2271, 2299, 2458, 2460, 2501, 2597, 2604, 2826, 2897, 2958, 2959, 3087, 3098,

190981    ()
191036    ()
191058    ()
191217    (UPC:Does Not Apply)
191307    ()
191393    ()
191425    ()
191485    ()
191489    ()
191848    ()
191861    ()
191870    ()
192034    ()
192123    ()
192172    ()
192410    ()
192413    ()
192569    ()
192570    ()
192720    ()
192777    ()
192865    ()
192908    ()
193657    ()
193903    ()
561652    ()
561900    ()
562212    ()
562213    ()
562254    ()
562498    ()
562530    ()
562535    ()
562553    ()
562731    ()
562889    ()
562912    ()
563309    (Pillowcase Size:Standard)
563448    ()
563599    ()
563604    ()
563617    ()
563641    ()
563731    ()
564298    ()
564491    ()
564760    ()
565321    ()
565337    ()
565494    ()
565617    ()
565697    ()
565747    ()
566168    ()
566306    ()
566317    ()
566354    ()
566370    ()
566543    ()
566643    ()
873304    ()
873634    ()
873953    ()
874471    ()
874498    ()
874560    ()
874828    ()
875707    ()
876041    ()
876584    ()
876661    ()
878210    ()
879234    ()
879496   

919820    (Country/Region of Manufacture:China,Brand:Unb...
920849    (ASIN:2XTEJH1GSH,MPN:Does not Apply,Country/Re...
925332    (ASIN:CNOHCPIJFJ,MPN:Does not Apply,Brand:Unbr...
933647    (MPN:Does not Apply,Country/Region of Manufact...
937612    (ASIN:2XTEJH1GSH,Brand:Unbranded/Generic,Count...
942780    (Brand:Unbranded/Generic,Package size:13 * 13 ...
947805    (MPN:Does not Apply,ASIN:PSSFUELURC,Country/Re...
948065    (Brand:Unbranded/Generic,Country/Region of Man...
951084    (MPN:Does not Apply,ASIN:2XTEJH1GSH,Brand:Unbr...
972855    (Brand:Unbranded/Generic,Unfold size:45 * 45cm...
976633    (Brand:Unbranded/Generic,Country/Region of Man...
982647    (Country/Region of Manufacture:China,MPN:Does ...
983434    (ASIN:PSSFUELURC,MPN:Does not Apply,Unfold siz...
984800    (ASIN:PSSFUELURC,Country/Region of Manufacture...
984971    (Country/Region of Manufacture:China,MPN:Does ...
988823    (ASIN:PSSFUELURC,Brand:Unbranded/Generic,Packa...
991955    (Package size:13 * 13 * 4cm / 

930943    (Country/Region of Manufacture:China,Designer/...
931686    (Material:as described,Designer/Brand:Unbrande...
932122    (MPN:Does Not Apply,Designer/Brand:Unbranded,S...
934810    (Material:as described,Color:as described,Desi...
938667    (Color:as described,Material:as described,Size...
941531    (Brand:Unbranded,MPN:Does Not Apply,Material:a...
942335    (Brand:Unbranded,MPN:Does Not Apply,Designer/B...
944363    (Country/Region of Manufacture:China,MPN:Does ...
944429    (Country/Region of Manufacture:China,Size:as d...
944876    (MPN:Does Not Apply,Material:as described,Colo...
945112    (Color:as described,MPN:Does Not Apply,Designe...
945307    (Material:as described,Designer/Brand:Unbrande...
947610    (Country/Region of Manufacture:China,Brand:Unb...
947735    (Color:as described,Brand:Unbranded,Material:a...
948909    (Brand:Unbranded,MPN:Does Not Apply,Size:as de...
949225    (Material:as described,Designer/Brand:Unbrande...
950037    (Material:as described,Size:as

923515    (MPN:does not apply,Category:Sheet & Pillowcas...
924816    (Size:Queen,Brand:Does not apply,MPN:does not ...
925610    (Brand:Does not apply,Category:Sheet & Pillowc...
927021    (UPC:does not apply,Tax:This listing not inclu...
929810    (Shipping:Fast Two-day shipping,Category:Sheet...
932135    (Category:Sheet & Pillowcase Sets,MPN:does not...
932880    (Category:Sheet & Pillowcase Sets,Size:Queen,B...
934904    (MPN:does not apply,UPC:does not apply,Categor...
937567    (MPN:does not apply,Size:Queen,Brand:Does not ...
937740    (Category:Sheet & Pillowcase Sets,MPN:does not...
938074    (MPN:does not apply,Size:Queen,UPC:does not ap...
938450    (UPC:does not apply,MPN:does not apply,Categor...
941850    (Category:Sheets & Pillowcases,Brand:Does not ...
942386    (Category:Sheet & Pillowcase Sets,Brand:Does n...
949657    (Brand:Does not apply,Shipping:Fast Two-day sh...
950263    (Category:Duvets Covers & Sets,Brand:Does not ...
950334    (MPN:does not apply,Brand:Does

32787    (Model:14966285,Material:100% Cotton,Color:Whi...
50857    (Material:100% Cotton,Type:Bed Pillow,Model:14...
-----------------
GROUP: 2479
32792    (Material:Flannel,Brand:Pottery Barn Kids,Colo...
966877    (Color:Multi-Color,Brand:Pottery Barn Kids,Typ...
-----------------
GROUP: 17018
32797    (UPC:690002409200,Publisher:SanFab Tex,Product...
959842    (ISBN:Not Applicable,UPC:689993877716,Publishe...
-----------------
GROUP: 2771
32798    (Bed Size:Twin,Brand:Ralph Lauren)
961691    (Bed Size:Twin,Brand:Ralph Lauren)
-----------------
GROUP: 14271
32801    (Bed Size:King,Type:Flat Sheets,Color:Multi-Co...
564848    (Color:Multi-Color,Bed Size:King,Thread Count:...
930771    (Brand:Yves Delorme,Material:100% Cotton,Color...
-----------------
GROUP: 10754
32806    (Brand:Handmade)
32808    (Brand:Handmade)
32809    (Brand:Handmade)
32810    (Brand:Handmade)
33419    (Brand:Handmade)
35644    (Brand:Handmade)
45178    (Brand:Handmade)
45179    (Brand:Handmade)
45180    (Brand

948880    (Brand:Unbranded)
950243    (Brand:Unbranded)
950316    (Brand:Unbranded)
952041    (Brand:Unbranded)
957740    (Brand:Unbranded)
959900    (Brand:Unbranded)
962269    (Brand:Unbranded)
964542    (Brand:Unbranded)
964995    (Brand:Unbranded)
965262    (Brand:Unbranded)
968279    (UPC:6088736119573,Brand:Unbranded)
973736    (Brand:Unbranded)
976776    (Brand:Unbranded)
977232    (Brand:Unbranded)
978586    (Custom Bundle:No,Non-Domestic Product:No,Bran...
980820    (Brand:Unbranded)
987260    (Brand:Unbranded)
987852    (Brand:Unbranded)
990719    (Brand:Unbranded)
991427    (Brand:Unbranded)
995053    (Brand:Unbranded)
-----------------
GROUP: 9834
32953    (Style:Patterned,Type:Sheet Sets,Brand:PB Pott...
891085    (Size:XL Twin,Brand:PB Pottery Barn Teen,Style...
-----------------
GROUP: 19507
32955    (Color:White,Label:MattRest,MPN:COMIN18JU04344...
192336    (Manufacturer:MattRest,EAN:0689851022685,Model...
889646    (Binding:Kitchen,ProductGroup:Home,PartNumber:...
976

37368    (Brand:Unbranded,Country/Region of Manufacture...
37937    (Material:as described,MPN:Does Not Apply,Desi...
46105    (MPN:Does Not Apply,Size:as described,Country/...
46835    (Color:Purple,Brand:Unbranded,Size:as describe...
47253    (Brand:Unbranded,Color:Purple,Material:as desc...
47636    (Brand:Unbranded,Color:Purple,Country/Region o...
48302    (MPN:Does Not Apply,Brand:Unbranded,Country/Re...
49529    (Brand:Unbranded,Country/Region of Manufacture...
50142    (MPN:Does Not Apply,Size:as described,Designer...
188807    (Country/Region of Manufacture:China,Color:Pur...
188866    (Color:Purple,Designer/Brand:Unbranded,Country...
188954    (Quantity:1,Size:as described,Material:as desc...
190090    (Brand:Unbranded,MPN:Does Not Apply,Material:a...
190932    (Size:as described,Brand:Unbranded,Color:Purpl...
191227    (Designer/Brand:Unbranded,Color:Purple,MPN:Doe...
191589    (Material:as described,Designer/Brand:Unbrande...
192264    (Material:as described,Brand:Unbranded,

33318    (Type:All In One,Brand:As Seen On TV, ZOOM,Col...
50528    (Type:All In One,Material:Poly-Sateen,Pattern:...
-----------------
GROUP: 24828
33320    (Brand:Unbranded,Color:Grey,Material:as descri...
46758    (Quantity:2,Color:Grey,Material:as described,M...
892646    (Brand:Unbranded,MPN:Does Not Apply,Color:Grey...
918992    (Brand:Unbranded,Quantity:2,Material:as descri...
-----------------
GROUP: 37396
33329    (Bed Size:Twin,Color:PINK WHITE,Brand:Laura As...
938094    (Material:50 COTTON 50 POLY,Bed Size:Twin,Colo...
-----------------
GROUP: 25143
33331    (Material:as described,MPN:Does Not Apply,Desi...
991061    (Designer/Brand:Unbranded,Color:Wine Red-Style...
-----------------
GROUP: 30274
33332    (Style:Modern,Bed Size:Non-Standard,Brand:Unbr...
190387    (Thread Count:1000 and Higher,Brand:Unbranded,...
-----------------
GROUP: 24842
33333    (Size:190x80cm,Color:Pink,Country/Region of Ma...
37293    (Color:Pink,Size:190x80cm,Brand:Unbranded,Coun...
45479    (Coun

51299    (Brand:Signoria Firenze)
52314    (Brand:Signoria Firenze)
52316    (Brand:Signoria Firenze)
52318    (Brand:Signoria Firenze)
189722    (Brand:Signoria Firenze)
189723    (Brand:Signoria Firenze)
189725    (Brand:Signoria Firenze)
190688    (Brand:Signoria Firenze)
190689    (Brand:Signoria Firenze)
190690    (Brand:Signoria Firenze)
190692    (Brand:Signoria Firenze)
192452    (Brand:Signoria Firenze)
192453    (Brand:Signoria Firenze)
192454    (Brand:Signoria Firenze)
192455    (Brand:Signoria Firenze)
192456    (Brand:Signoria Firenze)
193414    (Brand:Signoria Firenze)
193415    (Brand:Signoria Firenze)
193416    (Brand:Signoria Firenze)
193417    (Brand:Signoria Firenze)
193419    (Brand:Signoria Firenze)
562495    (Brand:Signoria Firenze)
562496    (Brand:Signoria Firenze)
562497    (Brand:Signoria Firenze)
563472    (Brand:Signoria Firenze)
563473    (Brand:Signoria Firenze)
563474    (Brand:Signoria Firenze)
563477    (Brand:Signoria Firenze)
563479    (Brand:Signori

189071    (Tax:This item without tax,Size:Twin XL,Brand:...
191798    (Size:Twin XL,MPN:does not apply,Tax:This item...
191806    (UPC:does not apply,Tax:This item without tax,...
562701    (Brand:Does not apply,Model Number:MFGREYTXL,M...
564282    (MPN:does not apply,Category:Sheet & Pillowcas...
920648    (Category:Sheets & Pillowcases,Brand:Does not ...
930414    (MPN:does not apply,Category:Sheets & Pillowca...
934925    (Brand:Does not apply,UPC:does not apply,Categ...
962104    (Shipping:Fast Two-day shipping,Size:Twin XL,T...
965820    (UPC:does not apply,Model Number:SSPLMTXL,MPN:...
977385    (Model Number:SH000186,UPC:does not apply,Size...
979721    (Shipping:Fast Two-day shipping,Category:Sheet...
991588    (Tax:This item without tax,UPC:does not apply,...
993870    (Size:Twin XL,Brand:Does not apply,Category:Sh...
995782    (Tax:This item without tax,Size:Twin XL,Shippi...
-----------------
GROUP: 10722
33617    (Pillowcase Size:Standard,Brand:Jack Dempsey,C...
33718    (

33901    (MPN:Does Not Apply,Color:Red,Material:100% Co...
46217    (Material:100% Cotton,Country/Region of Manufa...
-----------------
GROUP: 39259
33902    (MPN:Does Not Apply,Color:Multi-Color,Style:Fr...
45887    (Type:Fitted Sheets,Color:Multi-Color,Style:Fr...
190903    (Pattern:Floral,Material:cotton,Bed Size:Full,...
-----------------
GROUP: 36815
33906    (Binding:Kitchen,Publisher:Basic Choice,Produc...
565726    (Brand:Luxe Bedding,MPN:Not Applicable,UPC:687...
-----------------
GROUP: 16592
33907    (Brand:BASIC CHOICE,Color:Turquoise,Manufactur...
905424    (Brand:BASIC CHOICE,EAN:0696602900970,Publishe...
923883    (ISBN:Not Applicable,ProductGroup:Home,Publish...
943783    (PartNumber:4023,ISBN:Not Applicable,Color:Tur...
948017    (MPN:4023,EAN:0692624888432,Manufacturer:Basic...
-----------------
GROUP: 34258
33908    (Model:BED3PCS05,Bed Size:95" X 85" Inch ( 240...
900851    (Custom Bundle:No,MPN:Does Not Apply,Type:Shee...
979364    (Brand:Handmade,Material:100% Cot

565895    (Material:Cotton,Brand:Aazeem,Country/Region o...
565897    (Material:Cotton,Pattern:Abstract,Shape:Not sp...
565899    (Material:Cotton,Pattern:Abstract,Shape:Tear D...
565900    (Shape:Tear Drop,Country/Region of Manufacture...
565901    (Color:Multi,Size:Double,Shape:Chair,Thickness...
565902    (Size:Double,Shape:Not specified,Pattern:Abstr...
565904    (Pattern:Abstract,Material:Cotton,Color:Multi,...
565906    (Shape:Not specified,Washable:Not specified,Tr...
876359    (Shape:Not specified,Size:Double,Transparency:...
878241    (Thickness:Not specified,Brand:Aazeem,Transpar...
878755    (Thickness:Not specified,Country/Region of Man...
878896    (Brand:Aazeem,Material:Cotton,Transparency:Not...
879687    (Washable:Not specified,MPN:Does not apply,Mat...
879854    (Brand:Aazeem,Wash Care:Machine Wash,Transpare...
882349    (Pattern:Abstract,Brand:Aazeem,Washable:Not sp...
882683    (Size:Double,Shape:Rectangular,Color:Multi,Bra...
885329    (MPN:Does not apply,Wash Care:

49373    (Brand:somna.sarka,Theme:Luxury,Style:Modern,S...
884605    (Theme:Luxury,Size:Short Queen,Type:Sheet Set,...
-----------------
GROUP: 226
34154    (MPN:does not apply,Brand:Beekman)
37072    (MPN:does not apply,Brand:Beekman)
-----------------
GROUP: 40664
34160    (Bed Size:Full,Pattern:Harry Potter; Gryffindo...
935534    (Pillowcase Size:Standard 20 x 30",Type:Pillow...
-----------------
GROUP: 36014
34162    (Thread Count:1000,Color:Orange,Type:Sheet Set...
911340    (Non-Domestic Product:No,Modified Item:No,Cust...
-----------------
GROUP: 36108
34163    (Brand:Scala,Bed Size:King,Color:White,Custom ...
190462    (Pattern:Solid / Plain,Bed Size:King,Thread Co...
-----------------
GROUP: 36029
34167    (Thread Count:1000,Pattern:Striped,Pillowcase ...
882922    (Material:100% Egyptian Cotton,Type:Sheet Sets...
-----------------
GROUP: 15770
34170    (Pillowcase Size:Standard,Bed Size:Twin,Color:...
193562    (Type:Pillowcases,Pillowcase Size:Standard,MPN...
--------------

52680    (Quantity:1,Material:as described,Brand:Unbran...
565052    (Material:as described,Designer/Brand:Unbrande...
880619    (Material:as described,Style:8,Size:as describ...
937828    (Designer/Brand:Unbranded,Country/Region of Ma...
951158    (Style:8,Size:as described,Quantity:1,Material...
983902    (Designer/Brand:Unbranded,Color:Light Green,Br...
-----------------
GROUP: 16798
34409    (Label:Cosy House Collection,ISBN:Not Applicab...
48186    (Dimensions:L 8.46 x W 5.16 x H 1.73 inches,EA...
51363    (EAN:0689989293155,MPN:Not Applicable,Color:Bl...
979581    (Label:Cosy House Collection,Manufacturer:Cosy...
-----------------
GROUP: 25011
34415    (Brand:Unbranded,Material:as described,Style:3...
563163    (Country/Region of Manufacture:China,Quantity:...
888913    (Color:Pink-190x80cm,Style:3,Designer/Brand:Un...
-----------------
GROUP: 24998
34416    (Size:as described,MPN:Does Not Apply,Brand:Un...
49544    (MPN:Does Not Apply,Brand:Unbranded,Style:9,Si...
563502    (Mat

34593    (GSM (Grams per Square Meter):150 to 300,MPN:D...
36114    (Personalized:No,Pattern:Abstract,Model:Cloud,...
36701    (Model:Cloud,Size:Body,Labels & Certifications...
37493    (EAN:Does not apply,Color:Beige,Thread Count:2...
37494    (Brand:Does not apply,Style:African,MPN:Does n...
52774    (Personalized:No,Style:African,Thread Count:20...
193615    (Features:2-in-1,UPC:Does not apply,EAN:Does n...
565677    (ISBN:Does not apply,Color:Beige,Character Fam...
962946    (Labels & Certifications:NOMITE Mark,EAN:Does ...
987071    (Size:Body,Character Family:101 Dalmatians,Col...
-----------------
GROUP: 15769
34595    (Pattern:TV, Movies & Music,Pillowcase Size:St...
965429    (Pillowcase Size:Standard,Color:Multi-Color,Ma...
-----------------
GROUP: 6552
34597    (Product Dimensions:14 x 10.9 x 4 inches,Model...
51353    (item_width:4.0,Size:Queen,Brand:SONORO KATE,C...
189875    (item_width:4.0,Model:Does Not Apply,Brand:SON...
193597    (Item Location:United States,Shipping 

189446    (Brand:Unbranded,Designer/Brand:Unbranded,MPN:...
191648    (Designer/Brand:Unbranded,MPN:Does Not Apply,C...
563544    (Size:190x80cm,Designer/Brand:Unbranded,Countr...
983696    (Country/Region of Manufacture:China,Designer/...
-----------------
GROUP: 8273
34873    (MPN:CB2Z106-07-01Q,Pillowcase Size:Queen,Type...
945997    (Model:CB2Z106-07-01Q,MPN:CB2Z106-07-01Q,Brand...
948726    (Type:Pillowcases,MPN:CB2Z106-07-01Q,Pillowcas...
-----------------
GROUP: 30932
34875    (Brand Color:White Multi Color,Wash Care:Machi...
47201    (Pillowcase Size:Not Include,Style:Arts & Craf...
47202    (Thread Count:201-400,Type:Flat Sheets,Modifie...
563965    (Style:Arts & Crafts/Mission Style,Wash Care:M...
880557    (Type:Flat Sheets,Brand:Handmade,Pillowcase Si...
989684    (Color:White,Country/Region of Manufacture:Ind...
-----------------
GROUP: 24841
34882    (Color:Coffee Beige Blue,Size:190x80cm,MPN:Doe...
193401    (Designer/Brand:Unbranded,Country/Region of Ma...
892729    (De

35313    (MPN:does not apply,Brand:Jak & Jemma Home)
565229    (Brand:Jak & Jemma Home,MPN:does not apply)
885180    (MPN:does not apply,Brand:Jak & Jemma Home)
986920    (Brand:Jak & Jemma Home,MPN:does not apply)
-----------------
GROUP: 21297
35323    (Care Instructions:Machine Wash,Brand:Mainstay...
942389    (Model:MS8801321522-62,Type:Pillowcases,Size:K...
-----------------
GROUP: 8937
35338    (Pattern:Solid,Brand:Hudson Park,Bed Size:TWIN...
975166    (Color:LIGHT BROWN,Brand:Hudson Park,Country/R...
-----------------
GROUP: 26458
35341    (Type:Pillowcase,Material:100% Cotton,Theme:Fo...
50438    (Type:Pillowcase,Theme:Cowboys & Western,Mater...
892949    (Theme:Hippie,Type:Pillowcase,Style:Children's...
-----------------
GROUP: 6531
35354    (Color:Green,Brand:RICK AND MORTY,Size:Standar...
901816    (Shape:Boomerang,Theme:Airplanes,Material:Poly...
-----------------
GROUP: 28640
35370    (Type:Fitted Sheets,Pattern:Solid,Features:Dee...
951623    (Color:TAUPE,Brand:Real Cott

969794    (MPN:FLSS-BRPL-TXL,Manufacturer:Pinzon,Model:F...
-----------------
GROUP: 35502
35679    (Brand:Unbranded,Size:Twin)
36135    (Size:Twin,Brand:Unbranded)
50883    (Brand:Unbranded,Size:Twin)
565650    (Brand:Unbranded,Size:Twin)
-----------------
GROUP: 39151
35681    (Size:Full,Color:White,Number of Items in Set:...
46037    (Style:Modern,Room:Bedroom, Children's Bedroom...
189983    (Custom Bundle:No,Unit Type:Kg,Number of Items...
562770    (Shape:Rectangular,Size:Full,Style:Modern,Thre...
565375    (Care Instructions:Machine Wash In Cold Water,...
876075    (Country/Region of Manufacture:India,Color:Whi...
891739    (Features:Hypoallergenic,Machine Washable,Tumb...
894281    (Department:Adults,Kids,Teens,Material:Egyptia...
896739    (Material:Egyptian Cotton,Style:Modern,Size:Fu...
948114    (Drop Length:6 in,Department:Adults,Kids,Teens...
-----------------
GROUP: 42095
35686    (Brand:Disney (Disney),MPN:100210617801-01-01)
36547    (MPN:100210617801-01-01,Platform:Si

36101    (PartNumber:4023,ProductGroup:Home,UPC:6926200...
37374    (Manufacturer:Basic Choice,EAN:0689989145928,I...
46589    (EAN:0691042815501,Color:White,Brand:BASIC CHO...
562265    (Label:Basic Choice,Publisher:Basic Choice,UPC...
979800    (Brand:BASIC CHOICE,Label:Basic Choice,MPN:402...
-----------------
GROUP: 10879
36111    (Brand:Simply Shabby Chic)
944504    (Brand:Simply Shabby Chic)
-----------------
GROUP: 214
36117    (Shipping:Fast Two-day shipping,Brand:Abripedi...
562142    (Model:Does Not Apply,Brand:Abripedic,MPN:Does...
-----------------
GROUP: 37525
36144    (UPC:N/A,MPN:N/A,EAN:N/A,ISBN:N/A,Brand:Superior)
51206    (UPC:N/A,ISBN:N/A,Brand:Superior,EAN:N/A,MPN:N/A)
880537    (MPN:N/A,Brand:Superior,ISBN:N/A,EAN:N/A,UPC:N/A)
987374    (UPC:N/A,MPN:N/A,ISBN:N/A,Brand:Superior,EAN:N/A)
-----------------
GROUP: 184
36154    (Model:Does Not Apply,MPN:Does Not Apply,Brand...
48161    (Model:Does Not Apply,Brand:Carressa Linen,MPN...
48816    (Brand:Carressa Linen,MPN:

36889    (Product Dimensions:10.8 x 9.7 x 4.3 inches,Sh...
562063    (Color:Taupe,Shipping Weight:3.35 pounds,EAN:0...
968653    (Item Weight:3.35 pounds,Item model number:SYN...
-----------------
GROUP: 16776
36893    (Manufacturer:FADFAY,EAN:0695639485641,ISBN:No...
566133    (ProductGroup:Home,UPC:691060274052,Label:FADF...
914406    (ISBN:Not Applicable,ProductGroup:Home,Brand:F...
990534    (Binding:Kitchen,Label:FADFAY,UPC:691168279751...
-----------------
GROUP: 40377
36917    (Country/Region of Manufacture:India,Color:Bla...
49124    (Washable:Not specified,Wash Care:Machine Wash...
51905    (Color:Black,Country/Region of Manufacture:Ind...
-----------------
GROUP: 19503
36921    (PartNumber:ZM18-SH-09,Color:White,UPC:7369839...
192250    (Label:HC Collection,Brand:HC COLLECTION,MPN:S...
967596    (Manufacturer:HC Collection,Model:SYNCHKG05610...
-----------------
GROUP: 5847
36922    (Type:Sheet Sets,Brand:Heartland,Bed Size:Full...
193224    (Color:White,Modified Item:No,Bed 

990157    (Publisher:LIFETOWN,Binding:Kitchen,Color:Dark...
-----------------
GROUP: 16900
37385    (EAN:0696543946112,Publisher:Basic Choice,Numb...
45321    (ProductGroup:Home,UPC:Not Applicable,Manufact...
45415    (EAN:0696585573826,Brand:Basic Choice,MPN:Not ...
189496    (EAN:0691166978144,MPN:Not Applicable,UPC:6911...
-----------------
GROUP: 18790
37387    (Brand:Cloud 9,Thread Count:500,Size:22" X 32"...
562578    (Material:100% Cotton,Features:Fits Even The F...
-----------------
GROUP: 18827
37388    (Color:White,Country/Region of Manufacture:Ind...
193509    (Brand:Cloud 9,Country/Region of Manufacture:I...
-----------------
GROUP: 6539
37400    (UPC:313113955977,Model:Does Not Apply,Product...
37469    (item_length:9.8,Product Dimensions:10.1 x 9.8...
51267    (Product Dimensions:10.1 x 9.8 x 5.8 inches,Ma...
563001    (MPN:Does Not Apply,Item Weight:3.95 pounds,Fo...
566353    (Manufacturer:Morgan Home Fashions,item_length...
966812    (Customer Service:Professional Serv

44693    (Style:Novelty,Size:Twin,Room:Bedroom,Model:Po...
974525    (Material:100% Cotton,Model:Polo Teddy Bear,Br...
-----------------
GROUP: 6281
44696    (Brand:SafeRest,Type:Pillowcases,Style:Modern,...
50279    (Style:Modern,Model:Queen Size Pillow Topper,M...
884953    (Material:Cotton,Pillowcase Size:Standard,Mode...
-----------------
GROUP: 20604
44699    (MPN:does not apply,Size:King/Cal King Duvet C...
909585    (Brand:Does not apply,UPC:does not apply,Categ...
-----------------
GROUP: 16812
44711    (ProductGroup:Home,UPC:692761525955,Label:1 IN...
188928    (ProductGroup:Home,Publisher:1 INCH,ISBN:Not A...
191525    (Weight:3.08 lbs,Brand:1 INCH,MPN:Not Applicab...
910157    (Publisher:1 INCH,ProductGroup:Home,Brand:1 IN...
-----------------
GROUP: 16913
44714    (UPC:696392681882,Manufacturer:Reafort,Color:C...
49104    (ISBN:Not Applicable,MPN:Not Applicable,Brand:...
-----------------
GROUP: 15123
44715    (Type:Sheet Sets,Thread Count:201-400,Pillowca...
47557    (Pill

193850    (MPN:Does not apply,Manufacture_country:India,...
-----------------
GROUP: 2429
45161    (Brand:Pottery Barn Kids,Color:Multi-Color,Pil...
959981    (Color:Multi-Color,Type:Pillowcases,Material:1...
-----------------
GROUP: 2049
45162    (Material:Flannel,Pillowcase Size:Standard,Col...
564714    (Material:Flannel,Brand:Pottery Barn Kids,Bed ...
-----------------
GROUP: 14712
45187    (Color:BLUE,Bed Size:Twin,Brand:Ralph Lauren,P...
932023    (Size:Twin,Style:Cottage,Bed Size:Twin,Brand:R...
-----------------
GROUP: 19522
45204    (Label:Charisma,Publisher:Charisma,ProductGrou...
193603    (Color:White,Manufacturer:Charisma,Label:Chari...
-----------------
GROUP: 22730
45213    (Size:Single (American twin),Brand:Disney,Colo...
929251    (Color:Black,Brand:Disney,Size:Single (America...
-----------------
GROUP: 2069
45214    (Color:Gray,Bed Size:Queen,Type:Sheet Sets,Bra...
188942    (Color:Gray,Bed Size:Queen,Type:Sheet Sets,Bra...
-----------------
GROUP: 6526
45217    (Cer

883712    (Brand:Remington,Type:Pillowcases,Pillowcase S...
-----------------
GROUP: 17037
45996    (Brand:Comfy Basics,ProductGroup:Home,UPC:6899...
50991    (ISBN:Not Applicable,UPC:Not Applicable,Publis...
-----------------
GROUP: 16891
46007    (Label:Basic Choice,Manufacturer:Basic Choice,...
563057    (NumberOfItems:2,Publisher:Basic Choice,Produc...
564623    (Color:Gray,ProductGroup:Home,Publisher:Basic ...
882556    (UPC:689998342660,EAN:0689998342660,Color:Gray...
-----------------
GROUP: 41378
46010    (UPC:695641818239,MPN:FD23001-QS5,Color:Style5...
566521    (Model:FD23001-QS5,Manufacturer:FenDie,Label:F...
-----------------
GROUP: 207
46012    (Brand:Pillowfort,MPN:does not apply)
874599    (Custom Bundle:No,Brand:PILLOWFORT,MPN:DOES NO...
-----------------
GROUP: 20569
46016    (Brand:Does not apply,Category:Pillowcases,Tax...
50309    (Brand:Does not apply,No Sale Tax:Yes,MPN:does...
193284    (Package include:2 pillow case.,MPN:does not a...
900938    (Free Shipping:Y

46618    (ISBN:Not Applicable,UPC:691200340852,ProductG...
189600    (MPN:4023,Brand:BASIC CHOICE,Manufacturer:Basi...
190270    (MPN:4023,UPC:691200402420,Manufacturer:Basic ...
562364    (Manufacturer:Basic Choice,Publisher:Basic Cho...
566132    (Publisher:Basic Choice,PartNumber:4023,UPC:69...
876887    (ISBN:Not Applicable,ProductGroup:Home,UPC:692...
987418    (Brand:BASIC CHOICE,Publisher:Basic Choice,UPC...
-----------------
GROUP: 35107
46644    (Room:Bedroom,Material:Cotton Blend,Bed Size:F...
963864    (Threadcount:200 or less,Style:Traditional,Bra...
-----------------
GROUP: 29741
46648    (Pattern:Striped,Room:Bedroom,Model:HENLEY, FU...
190596    (Pattern:Striped,Type:Flat Sheets,Style:Modern...
-----------------
GROUP: 35237
46652    (Bed Size:Queen,Model:MULTI FLORAL,Room:Bedroo...
46673    (Brand:ST. MARY'S,Color:Multi-Color,MPN:Does N...
-----------------
GROUP: 10900
46677    (Brand:Malden Mills by Berkshire)
565997    (UPC:Does not apply,Brand:Malden Mills by Berk..

47141    (Brand:SONORO KATE,MPN:no_apply)
193997    (Brand:SONORO KATE,MPN:no_apply)
-----------------
GROUP: 36535
47150    (Pattern:Animal Print,Pillowcase Size:King,Sty...
49950    (Material:100% Cotton,MPN:Does Not Apply,Count...
-----------------
GROUP: 8434
47152    (Manufacturer:Niagara Sleep Solution,Binding:K...
47366    (PartNumber:unknown,ProductGroup:Home,MPN:unkn...
940352    (PartNumber:unknown,Manufacturer:Niagara Sleep...
-----------------
GROUP: 24505
47153    (Bed Size:Full,Material:Flannel,Type:Sheet Set...
967161    (Material:Flannel,Bed Size:Full,Brand:London F...
-----------------
GROUP: 37073
47157    (Model:Cool & Crisp,Thread Count:201-400,Type:...
563921    (Thread Count:201-400,Material:100% Egyptian C...
904259    (Pattern:Solid,Features:Cooling,Custom Bundle:...
942561    (Model:Cool & Crisp,Type:Sheet Sets,Material:1...
967768    (Model:Cool & Crisp,Material:100% Egyptian Cot...
-----------------
GROUP: 26884
47163    (Material:100% Pima Cotton,Size:Queen,

50736    (Bed Size:Twin,Brand:Pottery Barn Teen,Materia...
-----------------
GROUP: 15563
47967    (Brand:Kmart,Pattern:Solid,Material:50% Polyes...
962949    (Pattern:Solid,Material:50% Polyester 50% Cott...
-----------------
GROUP: 14241
47969    (Pattern:Striped,Type:Flat Sheets,Thread Count...
193188    (Threadcount:201-500,Model:Ralph Lauren Invern...
-----------------
GROUP: 33904
47971    (Material:100% Cotton,Bed Size:Twin,Color:Whit...
50707    (Type:Flat Sheets,Brand:Pottery Barn Teen,Bed ...
50881    (Brand:Pottery Barn Teen,Bed Size:Twin,Type:Fl...
-----------------
GROUP: 6544
47981    (Shipping Weight:3.85 pounds,Brand:SONORO KATE...
49611    (No Tax:No sales Tax,Color:White,Manufacturer:...
52268    (Package Dimensions:16.6 x 11.3 x 5 inches,Shi...
191923    (Brand:SONORO KATE,TAX:TAX FREE,Package Dimens...
564764    (Brand:SONORO KATE,Color:White,UPC:74411043774...
892659    (EAN:0313049539500,Item Weight:3.55 pounds,MPN...
922575    (Package Dimensions:16.6 x 11.3 x 5 

562782    (Threadcount:200 or less,Color:White,Brand:Fin...
938796    (Color:White,Type:Flat Sheets,Material:Cotton ...
-----------------
GROUP: 28506
48702    (Color:Does Not Apply,Shipping Weight:3.53 pou...
192650    (Item model number:COMIN18JU020469,Model:COMIN...
-----------------
GROUP: 13296
48712    (Pillowcase Size:Standard,Room:Bedroom,Bed Siz...
49359    (Bed Size:Full)
190437    (Bed Size:full)
910600    (Bed Size:Full)
927210    (Bed Size:Full)
-----------------
GROUP: 10816
48713    (Model:SUPIMA CLASSIC HEMSTITCH QUEEN,Brand:Fi...
193643    (Brand:Fieldcrest)
911308    (Brand:Fieldcrest)
-----------------
GROUP: 20581
48720    (UPC:does not apply,Tax:This listing not inclu...
898540    (UPC:does not apply,Size:Standard (20x26),Bran...
-----------------
GROUP: 37388
48752    (Material:100% Cotton,Type:Flat Sheet,Color:Br...
885881    (Bed Size:Full,Material:100% Cotton,Pattern:So...
-----------------
GROUP: 37516
48755    (Brand:Brielle,MPN:807000158386)
942978    (UPC:D

49470    (MPN:SHTDVTCRStpGrayTwin01,Category:Duvet Cove...
52209    (Category:Duvet Cover Sets,Manufacturer:Sweet-...
-----------------
GROUP: 10910
49483    (Brand:PINZON)
189700    (Brand:Pinzon)
-----------------
GROUP: 12653
49484    (Brand:SFERRA,Bed Size:King,Type:Sheet,Color:C...
909739    (Color:Cashew,Brand:SFERRA,Bed Size:King,Type:...
-----------------
GROUP: 30595
49491    (Type:Sheet Sets,Brand:Twin,Non-Domestic Produ...
919162    (Pattern:Floral,Custom Bundle:No,Brand:Twin,Co...
-----------------
GROUP: 24826
49505    (Brand:Unbranded,Material:as described,MPN:Doe...
190873    (Color:Grey Coffee Beige,Brand:Unbranded,Mater...
193637    (Size:190x80cm,Country/Region of Manufacture:C...
905375    (Country/Region of Manufacture:China,Designer/...
-----------------
GROUP: 25002
49524    (Style:1,Size:as described,MPN:Does Not Apply,...
50520    (Color:Beige-50x75cm,Country/Region of Manufac...
190870    (MPN:Does Not Apply,Designer/Brand:Unbranded,S...
949443    (Material:as 

893726    (Style:18,Brand:Unbranded,Quantity:1,Color:Cof...
895760    (MPN:Does Not Apply,Style:18,Material:as descr...
975524    (Designer/Brand:Unbranded,Style:18,Color:Coffe...
-----------------
GROUP: 25083
50523    (Material:as described,Country/Region of Manuf...
193447    (MPN:Does Not Apply,Designer/Brand:Unbranded,C...
953240    (Size:as described,Material:as described,MPN:D...
968379    (Designer/Brand:Unbranded,Country/Region of Ma...
-----------------
GROUP: 25123
50529    (Brand:Unbranded,Designer/Brand:Unbranded,Coun...
193464    (Quantity:2,Size:as described,MPN:Does Not App...
970252    (Quantity:2,Material:as described,MPN:Does Not...
-----------------
GROUP: 24820
50532    (Brand:Unbranded,Color:as described,Material:a...
192593    (Color:as described,Size:45x45cm,Brand:Unbrand...
192797    (Color:as described,Size:45x45cm,Designer/Bran...
971173    (Country/Region of Manufacture:China,Material:...
995696    (Quantity:2,Brand:Unbranded,Color:as described...
----------

-----------------
GROUP: 41227
51337    (Brand:Fancy Linen,Manufacturer:Fancy linen,Pa...
188886    (Manufacturer:Fancy linen,Publisher:Fancy line...
563445    (MPN:43190-742197,Manufacturer:Fancy linen,Bra...
903592    (UPC:696226459991,ISBN:Not Applicable,Label:Fa...
-----------------
GROUP: 17056
51361    (EAN:0696398836101,UPC:696398836101,Color:Yell...
883736    (ProductGroup:Home,MPN:Not Applicable,Color:Ye...
-----------------
GROUP: 21599
51369    (Country/Region of Manufacture:India,Features:...
952447    (MPN:Does not apply,Material:Egyptian Cotton,P...
-----------------
GROUP: 18420
51390    (Material:100% Cotton,Color:Blue,Type:Sheet Se...
190800    (Size:Queen,Pattern:Floral,Style:French Countr...
939825    (Brand:Ralph Lauren,Type:Sheet Set,Style:Frenc...
-----------------
GROUP: 30236
51397    (MPN:Does Not Apply,Manufacturer Part Number:D...
910598    (Material:as described,Color:Dinosaur A,Manufa...
-----------------
GROUP: 8562
51405    (Bed Size:Twin,Brand:Ralph Laur

191690    (Color:Pea Green,Country/Region of Manufacture...
916515    (Size:as described,Designer/Brand:Unbranded,Co...
980800    (Material:as described,Quantity:2,MPN:Does Not...
-----------------
GROUP: 25041
52340    (Material:as described,Brand:Unbranded,Color:R...
190728    (Brand:Unbranded,Quantity:1,Size:as described,...
877990    (Quantity:1,Color:Red_1,Country/Region of Manu...
909582    (Material:as described,MPN:Does Not Apply,Styl...
-----------------
GROUP: 14894
52343    (Type:Flat Sheets,Brand:Ralph Lauren)
563522    (Brand:Ralph Lauren,Type:Flat Sheets)
930742    (Type:Flat Sheets,Brand:Ralph Lauren)
967326    (Type:Flat Sheets,Brand:Ralph Lauren)
-----------------
GROUP: 9821
52346    (Material:Microfiber,Size:Queen,Type:Sheet Set...
958707    (MPN:SSMFP-002Q,Material:Microfiber,Dominant C...
-----------------
GROUP: 24815
52356    (Material:Soft Cotton,Brand:Unbranded,MPN:Does...
190708    (Quantity:2,Material:Soft Cotton,MPN:Does Not ...
193445    (Country/Region of 

188716    (Brand:Cuddl Duds,Bed Size:Queen,Room:Bedroom,...
561501    (Brand:Cuddl Duds,Bed Size:Queen,Material:100%...
-----------------
GROUP: 19421
188718    (Manufacturer:Basic Choice,MPN:Not Applicable,...
911295    (Manufacturer:Basic Choice,MPN:Not Applicable,...
-----------------
GROUP: 8437
188725    (Manufacturer:Cosy House Collection,MPN:unknow...
565517    (Manufacturer:Cosy House Collection,ProductGro...
-----------------
GROUP: 16771
188726    (EAN:0692623471123,Brand:BASIC CHOICE,UPC:6926...
563552    (MPN:Not Applicable,EAN:0692623190284,Brand:BA...
991933    (ProductGroup:Home,Label:Basic Choice,ISBN:Not...
-----------------
GROUP: 13935
188729    (Pillowcase Size:Queen,Custom Bundle:No,Materi...
931240    (Pattern:Solid,Bed Size:Queen,Material:Flannel...
-----------------
GROUP: 30644
188777    (Color:Black,Type:Sheet Sets,Bed Size:Twin,Bra...
892381    (Type:Sheet Sets,Bed Size:Twin,Brand:Twin,Colo...
-----------------
GROUP: 23621
188786    (Shipping Weight:2.05 pou

926371    (Size:Queen,Material:100% Cotton,Color:Gray,Ty...
-----------------
GROUP: 21290
190198    (Pattern:Dogs,Model:Watercolor Dogs,Features:D...
564538    (Custom Bundle:No,Color:White,Type:Sheet Sets,...
-----------------
GROUP: 40214
190204    (Care Instructions:Dry Clean Or Machine wash c...
910244    (Theme:Animals,GSM (Grams per Square Meter):15...
-----------------
GROUP: 37513
190222    (Model:B0144BL5IQ,Brand:ViscoSoft,MPN:B0144BL5IQ)
192770    (MPN:B0144BL5IQ,Brand:ViscoSoft,Model:COMINHKP...
956945    (Brand:ViscoSoft,Model:COMINHKPR85832,MPN:B014...
995375    (Brand:ViscoSoft,MPN:B0144BL5IQ,Model:COMINHKP...
-----------------
GROUP: 29445
190226    (Color:Lavender (Purple),EAN:Does not apply,MP...
565961    (Bed Size:Twin Extra Long,Model:Does not apply...
-----------------
GROUP: 28587
190238    (Type:Fitted Sheets,Non-Domestic Product:No,Mo...
562930    (Model:BC-2633,Modified Item:No,Non-Domestic P...
-----------------
GROUP: 17012
190256    (EAN:0695634923476,Brand

GROUP: 41302
191725    (PartNumber:LS#CPP-01,ProductGroup:Home,Publis...
193796    (EAN:0691202249696,ISBN:Not Applicable,Brand:C...
-----------------
GROUP: 15762
191738    (MPN:Does Not Apply,Brand:Unicor,Color:White,C...
917007    (Fabric Type:50/50 Polyester/Cotton Blend,Thre...
944602    (Fabric Type:50/50 Polyester/Cotton Blend,Type...
-----------------
GROUP: 20574
191752    (Category:Sheet & Pillowcase Sets,Model Number...
966216    (MPN:Does not apply,Color:Grey,ISBN:Does not a...
-----------------
GROUP: 13922
191811    (Pillowcase Size:Queen,Model:Elk Deer Bear Moo...
878006    (Material:100% polyester,Color:White / Red / B...
-----------------
GROUP: 19413
191817    (EAN:0695641268126,Publisher:CGK Unlimited,Bra...
982573    (ISBN:Not Applicable,EAN:0696227806398,Color:W...
-----------------
GROUP: 23848
191824    (Size:King,MPN:does not apply,Category:Sheet &...
906362    (Brand:Westbury Manor,Category:Sheet & Pillowc...
-----------------
GROUP: 23847
191829    (MPN:does n

192579    (Bed Size:Full,Material:Cotton Blend,Type:Shee...
889073    (Brand:The Big One,Type:Sheet Sets,Material:Co...
-----------------
GROUP: 2050
192583    (Brand:Missoni,Bed Size:Queen,Color:Multi-Colo...
193569    (Material:100% Cotton,Type:Sheet Sets,Bed Size...
963268    (Bed Size:Queen,Brand:Missoni,Type:Sheet Sets,...
985825    (Color:Multi-Color,Material:100% Cotton,Brand:...
-----------------
GROUP: 20176
192601    (Bed Size:Queen,Drop Length:15 in,Style:Modern...
192616    (MPN:Does Not Apply,Country/Region of Manufact...
562636    (Model:indolinen-2,Pattern:Solid,Set Includes:...
565441    (Type:Sheet Sets,Thread Count:400,Bed Size:Que...
882701    (Bed Size:Queen,Material:100% Microfiber,Count...
921127    (Personalized:No,Pattern:Solid,Model:indolinen...
-----------------
GROUP: 20173
192603    (Style:Modern,Thread Count:NA,Type:Sheet Sets,...
921371    (Color:White,Size:Short Queen,Personalized:No,...
956121    (Style:Modern,Set Includes:Fitted Sheet,Flat S...
--------

995641    (Brand:Candid Bedding,Color:White,Model:CB2Z10...
-----------------
GROUP: 6554
193982    (Department:mens,EAN:0313100448468,Item Weight...
949525    (Item model number:AB-PC-I,Shipping:Fast N' Fr...
-----------------
GROUP: 36414
193984    (Color:Creamy,Material:Cotton Linen,Designer/B...
562150    (Style:2,Material:Cotton Linen,MPN:Does Not Ap...
-----------------
GROUP: 10877
194021    (Brand:Superior)
950507    (Brand:Superior)
-----------------
GROUP: 28480
194038    (ISBN:Does not apply,UPC:711181205985,Package ...
562089    (Manufacturer:Does not apply,EAN:0726084953543...
961371    (Brand:Does not apply,UPC:744110562729,Product...
-----------------
GROUP: 37540
194049    (Brand:ted-baker,MPN:5.0212530668e+012,Model:5...
935474    (Brand:ted-baker,Model:5.0212530668e+012,MPN:5...
967869    (Model:5.0212530668e+012,MPN:5.0212530668e+012...
-----------------
GROUP: 3474
194053    (Brand:Scala,Bed Size:Queen,Thread Count:1200 ...
950040    (Bed Size:Queen,Pillowcase Size:

562652    (Pillowcase Size:King,Brand:Pottery Barn,Count...
887891    (Bed Size:King,Country/Region of Manufacture:P...
-----------------
GROUP: 4668
562738    (Bed Size:Twin,Style:Modern,Thread Count:800 T...
565525    (Style:Modern,Material:100% Cotton,Pattern:Sol...
-----------------
GROUP: 12474
562742    (Brand:linens.affairs,Country/Region of Manufa...
883651    (Type:Sheet Sets,Color:White,Country/Region of...
-----------------
GROUP: 1797
562792    (Material:100% Cotton,Brand:Oake,Color:Dove,Be...
897628    (Color:Dove,Model:Piquet Printed,Brand:Oake,Be...
-----------------
GROUP: 18204
562866    (Brand:Hotel Collection,Size:FULL,Material:100...
960292    (Color:NAVY,Type:Sheet Set,Size:FULL,Thread Co...
-----------------
GROUP: 20602
562875    (Brand:Does not apply,Model Number:P233DPGTod,...
916601    (UPC:does not apply,MPN:does not apply,Size:To...
-----------------
GROUP: 37515
562898    (MPN:CO-1000TC-Q0-Dark Grey,Model:CO-1000TC-Q0...
881046    (MPN:CO-1000TC-Q0-Dark Gre

564220    (Bed Size:King,Brand:Ugg)
949001    (Bed Size:King,Brand:UGG)
995001    (Brand:UGG,Bed Size:King)
-----------------
GROUP: 5988
564238    (Country/Region of Manufacture:India,Type:Shee...
893839    (Material:Cotton,Pattern:Peacock Printed,Bed S...
-----------------
GROUP: 12506
564242    (Color:Navy Blue,Bed Size:King,Material:100% C...
895462    (Country/Region of Manufacture:India,Style:Mod...
-----------------
GROUP: 1524
564250    (Model:0,Custom Bundle:No,Modified Item:No,Typ...
983479    (Brand:Homemade,Pattern:Floral,Pillowcase Size...
-----------------
GROUP: 1796
564252    (Type:Pillowcases,Color:Blue,Pillowcase Size:S...
566485    (Pillowcase Size:Standard,Material:100% Polyes...
-----------------
GROUP: 18796
564257    (Country/Region of Manufacture:India,Thread Co...
979143    (Color:Tan,MPN:56951,Pattern:Solid,Brand:Club ...
-----------------
GROUP: 41235
564258    (Color:Cloud,Brand:Stone & Beam,Binding:Kitche...
897439    (MPN:2DGJH40,Binding:Kitchen,Brand:Ston

873133    (Model:Sheets,Size:FULL,Care Instructions:MACH...
982191    (Size:FULL,Type:Sheets Set,Material:COTTON PER...
-----------------
GROUP: 17812
873180    (Thread Count:400,Color:Blue,Country/Region of...
978827    (Color:Blue,Size:King,Country/Region of Manufa...
-----------------
GROUP: 35728
873204    (Thread Count:1000,Type:Sheet Set,Bed Size:Que...
873724    (Pillowcase Size:Queen,Material:Egyptian Cotto...
-----------------
GROUP: 27639
873247    (Bed Size:Twin,Pattern:Floral,Color:Pink,Brand...
932795    (Color:Pink,Material:100% Cotton,Type:Flat She...
-----------------
GROUP: 5845
873299    (Color:White,Type:Sheet,Brand:White,Bed Size:F...
923475    (Color:White,Type:Sheet,Brand:White,Bed Size:F...
-----------------
GROUP: 5989
873439    (Type:Sheet Sets,Features:Embroidered,Pillowca...
940050    (Pillowcase Size:King,Bed Size:King,Pattern:Em...
-----------------
GROUP: 3468
873701    (Custom Bundle:No,Type:Sheet Sets,Material:100...
896691    (Pattern:Solid,Pillowcase S

900133    (Color:Black,Bed Size:Full,Type:Flat Sheets,Th...
993353    (Pattern:Solid,Color:Black,Type:Flat Sheets,Th...
-----------------
GROUP: 19417
889466    (ProductGroup:Home,Color:White,Publisher:Cosy ...
941710    (ProductGroup:Home,Color:White,MPN:Not Applica...
-----------------
GROUP: 39564
889555    (Brand:DOORGA,Material:100% Cotton,Type:Flat S...
905634    (Brand:DOORGA,Type:Flat Sheets,Bed Size:Queen,...
-----------------
GROUP: 12220
889717    (Country/Region of Manufacture:United States,C...
924555    (Style:Novelty,Brand:Pacific,Country/Region of...
-----------------
GROUP: 28476
889949    (Package Dimensions:11.2 x 9.4 x 4.3 inches,Sh...
921068    (MPN:Does Not Apply,Item Weight:4 pounds,Manuf...
-----------------
GROUP: 30602
889977    (Bed Size:Twin,Brand:White,Color:Orange,Type:S...
924964    (Color:Orange,Type:Sheet Sets,Brand:White,Bed ...
-----------------
GROUP: 23035
890208    (Material:100% Cotton,Bed Size:Short Queen,Pat...
948651    (Type:Fitted Sheets,Thre

908635    (Brand:"Handmade",Type:Fitted Sheets,Color:Bla...
958773    (Type:Fitted Sheets,Pillowcase Size:Queen,Bran...
-----------------
GROUP: 10593
909093    (Bed Size:Full Extra Long,Pillowcase Size:Quee...
961361    (MPN:Does Not Apply,Pattern:Solid,Material:Egy...
-----------------
GROUP: 42356
909132    (MPN:1,Manufacturer:Royal Tradition,PartNumber...
917817    (ISBN:Not Applicable,MPN:1,Brand:Royal Traditi...
-----------------
GROUP: 36616
909580    (Modified Item:No,Brand:Handmade,Pillow Dimens...
922800    (Set Included:1 Double Bed Sheet + 2 Pillow Co...
987904    (Set Included:1 Double Bed Sheet + 2 Pillow Co...
-----------------
GROUP: 16830
910445    (Brand:JiangJue,EAN:0689738255847,MPN:Not Appl...
932066    (ProductGroup:Home,Publisher:JiangJue,UPC:6898...
-----------------
GROUP: 37517
910612    (Brand:White classic,Manufacturer:ELITE,MPN:t1...
975636    (Brand:White classic,Item model number:t180-12...
-----------------
GROUP: 41242
910671    (EAN:0691197431649,ISBN:

950025    (Style:Contemporary,Brand:Unbranded,Country/Re...
-----------------
GROUP: 40088
936335    (Brand:Bedify,Pillowcase Size:NO,Thread Count:...
947368    (Pillowcase Size:NO,MPN:Does Not Apply,Materia...
984574    (Color:White,Country/Region of Manufacture:Ind...
-----------------
GROUP: 34335
936354    (Room:Bedroom,Type:Sheets,Color:Multi-Color,Co...
971317    (Type:Sheets,Room:Bedroom,Style:Traditional,Br...
-----------------
GROUP: 30618
936683    (Bed Size:Twin,Brand:Twin,Type:Sheet Sets,Colo...
989091    (Type:Sheet Sets,Rise:Poleyster Microfiber,Bra...
-----------------
GROUP: 4665
936866    (Country/Region of Manufacture:India,Pillow Co...
980270    (Bed Size:King,Cushion Cover Size:16 x 16 Inch...
-----------------
GROUP: 35495
936919    (Color:Sage,Brand:Unbranded,Bed Size:Queen)
995387    (Color:Sage,Bed Size:Queen,Brand:Unbranded)
-----------------
GROUP: 14648
937220    (Size Type:Regular,Size:QUEEN,Brand:Elite,Weig...
950401    (Style:Custom,Manufacture_country:CHI

In [66]:
print(len(groups))

3112


In [67]:
"""
Evaluate clustering after it has gone through k-means and agglomorative clustering.
"""
p = full_clustering
# print(full_clustering)
actuals = []
preds = []
for i in range(len(p)):
    ID = c4_rows.iloc[i]['index']
    if ID in valid.index:
        actual = valid.loc[ID]['Group']
        actuals.append(actual)
        preds.append(p[i])
a = actuals # valid['Group'].tolist()
print(len(set(p)))
print(len(p))
evaluate(a, preds)

33383
42866
Confusion Matrix:
[[358744    362]
 [    94    200]]
precision: 0.35587188612099646
recall: 0.6802721088435374
f1 score: 0.4672897196261682


In [None]:
"""
Functions used to display the tags in a nicer manner below.
"""
def get_mapping(attribute):
    attribute = re.sub(r'[()]','', attribute)
#     attribute = re.split(r',', attribute)
#     attribute = [a.split(':') for a in attribute]
    attribute = re.split(r':+', attribute)
    attribute = [a.split(',') for a in attribute]
    '''
    for i, a in enumerate(attribute):
        attribute[i] = [s.strip() for s in a]
        all_attributes.add(attribute[i][0])
    #print(f'atttribute is: {attribute}')
    '''
    mapping = {}
    for i in range(len(attribute) - 1):
        if i == len(attribute) - 2:
            mapping[attribute[i][-1]] = attribute[i + 1][:]
        else:
            mapping[attribute[i][-1]] = attribute[i + 1][:-1]
    return mapping

def mapping_to_string(mapping):
    keys = list(mapping.keys())
    keys.sort()
    return ';'.join([f'{key}:{list(sorted(mapping[key]))}' for key in keys])

In [None]:
"""
Show the items where the two clusterings ,p(redict) and a(ctual), differ.
"""
for i in range(len(a)):
    for j in range(len(p)):
        if i != j:
            if a[i] == a[j] and p[i] != p[j]:
                print("These two items should be in the same group:")
            elif a[i] != a[j] and p[i] == p[j]:
                continue
                print("These two items should not be in the same group")
            else:
                continue
            print(f"tags: {mapping_to_string(get_mapping(validation_rows['Tags'].iloc[[i]].to_string(index=False)))}")
            print(f"urls: {validation_rows['primary_image_url'].iloc[[i]].to_string()}")
            print(f"tags: {mapping_to_string(get_mapping(validation_rows['Tags'].iloc[[j]].to_string(index=False)))}")
            print(f"urls: {validation_rows['primary_image_url'].iloc[[j]].to_string()}")
            print('-------------------')

In [None]:
old_clustering = full_clustering
print(old_clustering)

In [None]:
%matplotlib qt
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram
import numpy as np
from PIL import Image
import requests
from io import BytesIO

def plot_dendrogram(model, **kwargs):
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)

    #response = requests.get(url)
    #img = Image.open(BytesIO(response.content))
    link_labels = [df['Tags'][i] for i in clustering.labels_]
    dendrogram(linkage_matrix, labels = link_labels)
plot_dendrogram(clustering)
plt.show()

In [None]:
import re
from collections import Counter
freq=Counter()
attribute=[['']]*len(df)
trialrange=5000
for x in range(trialrange):#range(int(len(df)/10)):#len(df)
    attribute[x]=df.iloc[x,3].lower()
    attribute[x] = re.sub(r'[()]','', attribute[x])
    attribute[x] = re.split(r',', attribute[x])
    attribute[x] = [a.split(':') for a in attribute[x]]
    freq+=Counter([i[0] for i in attribute[x]])
    tempdict={}
    for i in attribute[x]:

            try:
                tempdict[i[0]]=float(i[1])
            except:
                try:
                    tempdict[i[0]]=i[1]
                except:
                    pass
    attribute[x]=tempdict

df['seg']=attribute
#print(df['seg'])