In [80]:
import pandas as pd
data = pd.read_csv('RetailerPromotionStrategy_data.csv')

In [81]:
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

<br><br>

### Orphan Return and Regular Return

Based on our observation, there are 4 behaviors of a returned item in a transaction:  

**1) returns with matching purchase**  
If we can find an exact match for the sales amount for a returned item, and if the count is a negative number, then we can flag this SKU as a returned item.
 
**2) orphan returns, i.e. returns with no matching purchase within the same transaction**  
If the count is negative, and the returns does not equal to a prior sale within the same transaction, then it is an orphan return. 

Treatment: We flag them and filter them out from the analysis as we don't know anything about them. Once again, if there are purchases in the same transaction when these orphan returns occur, do preserve the other purchases, whether they are discounts or regular.

**3) both returns and discounts in the same transaction**
 
**4) returns and regular priced item purchases in the same transaction**

Scenario 1 and 2 talks about whether we can find a matching purchase for the returned item, scenario 3 and 4 talks about what other kinds of purchases might appear along with a returned item.


In [82]:
d2 = data[data.Count<0]

In [119]:
d_credit = data[(data.Count==0)&(data.Sales>0)].head()

In [7]:
#Returns with matching purchase
#If the count is negative, and the returns equal to a prior sale within the same transaction, then it is an regular return
q = '''
    select
        a.*
    from
    (select * from data where Count<0) a
    join
    (select distinct TransactionID, SKU, Sales/Count as Unit from data where Count>0) b
    on 
    a.TransactionID=b.TransactionID and a.SKU=b.SKU and a.Sales/a.Count = b.Unit
    
    '''
d3 = pysqldf(q)

In [105]:
d3.head(3)

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
0,A29,11-7270-00007-00066,5,-2013.0,-1.0,3/11/12 00:00,A2729
1,A19,11-7287-00025-00075,4,-1721.0,-1.0,3/11/12 00:00,A16911
2,A27,11-7069-00020-00010,6,-4252.0,-1.0,3/11/12 00:00,A7293


In [84]:
data[(data.TransactionID=='11-5634-00002-00092')&(data.SKU=='A15554')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
16410,A78,11-5634-00002-00092,8,695.0,1.0,3/11/12 00:00,A15554
16411,A78,11-5634-00002-00092,9,695.0,1.0,3/11/12 00:00,A15554
16412,A78,11-5634-00002-00092,11,-695.0,-1.0,3/11/12 00:00,A15554
16413,A78,11-5634-00002-00092,13,-695.0,-1.0,3/11/12 00:00,A15554


#### Find rows when a customer returned an item purchased earlier at the regular price, AND purchased the same item at discounted price.


In [141]:
q = '''
    select
        a.*
    from data a
    join (select distinct TransactionID, SKU from d3) b
    on 
    a.TransactionID=b.TransactionID and a.SKU=b.SKU 
    where 
        a.Count=0 and a.Sales>0
    '''
d31 = pysqldf(q)

In [142]:
q = '''
    select
        a.*
    from data a
    join (select distinct TransactionID, SKU from d3) b
    on 
    a.TransactionID=b.TransactionID and a.SKU=b.SKU 
    where 
        a.Count=0 and a.Sales<0
    '''
d32 = pysqldf(q)

In [147]:
q = '''
    select
        distinct a.TransactionID, a.SKU
    from d32 a
    left join d31 b
    on a.TransactionID=b.TransactionID and a.SKU=b.SKU
    where b.TransactionID is null
    '''
d33 = pysqldf(q)

In [148]:
d33.head()

Unnamed: 0,TransactionID,SKU
0,11-5634-00009-00336,A2341
1,11-5634-00024-00094,A11213
2,11-5634-00024-00177,A21268
3,11-5634-00024-00199,A7184
4,12-0856-00009-00096,A20909


In [179]:
print d33.shape
print len(d33.TransactionID.unique())

(82, 2)
78


In [138]:
data[(data.TransactionID=='11-5634-00024-00094')&(data.SKU=='A11213')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
48134,A78,11-5634-00024-00094,2,-6995.0,-1.0,3/11/12 00:00,A11213
48135,A78,11-5634-00024-00094,4,-2500.0,-1.0,3/11/12 00:00,A11213
48136,A78,11-5634-00024-00094,5,6995.0,1.0,3/11/12 00:00,A11213
48137,A78,11-5634-00024-00094,6,-4495.0,0.0,3/11/12 00:00,A11213
48138,A78,11-5634-00024-00094,7,6995.0,1.0,3/11/12 00:00,A11213
48139,A78,11-5634-00024-00094,8,-4495.0,0.0,3/11/12 00:00,A11213


In [151]:
q = '''
    select
        a.*
    from d3 a
    left join d33 b
    on a.TransactionID=b.TransactionID and a.SKU=b.SKU
    where b.TransactionID is null
    '''
d34 = pysqldf(q)

In [270]:
print d3.shape
print d34.shape
print len(d34.TransactionID.unique())

(5803, 7)
(5708, 7)
4781


In [269]:
d34.head()

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
0,A29,11-7270-00007-00066,5,-2013.0,-1.0,3/11/12 00:00,A2729
1,A19,11-7287-00025-00075,4,-1721.0,-1.0,3/11/12 00:00,A16911
2,A27,11-7069-00020-00010,6,-4252.0,-1.0,3/11/12 00:00,A7293
3,A29,11-7270-00007-00072,37,-2395.0,-1.0,3/11/12 00:00,A19365
4,A29,11-7270-00007-00089,19,-3995.0,-1.0,3/11/12 00:00,A21166


In [271]:
data[(data.TransactionID=='11-7270-00007-00066')&(data.SKU=='A2729')][-4:]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
98,A29,11-7270-00007-00066,3,2013.0,1.0,3/11/12 00:00,A2729
99,A29,11-7270-00007-00066,4,2013.0,1.0,3/11/12 00:00,A2729
100,A29,11-7270-00007-00066,5,-2013.0,-1.0,3/11/12 00:00,A2729


In [226]:
# Regular return credit back (Sales>0 and Count=0)
q = '''
    select
        a.*
    from data a
    join (select distinct TransactionID, SKU from d34) b
    on a.TransactionID=b.TransactionID and a.SKU=b.SKU
    where a.Sales>0 and a.Count=0
    '''
d34_1 = pysqldf(q)

In [236]:
d34_1.head()

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
0,A29,11-7270-00007-00110,29,845.0,0.0,3/11/12 00:00,A6007
1,A29,11-7270-00007-00116,20,234.0,0.0,3/11/12 00:00,A2452
2,A29,11-7270-00008-00358,6,670.0,0.0,3/11/12 00:00,A13774
3,A29,11-7270-00014-00241,10,333.0,0.0,3/11/12 00:00,A21307
4,A29,11-7270-00006-00209,33,475.0,0.0,3/11/12 00:00,A15257


In [228]:
d34_1.shape

(356, 7)

In [235]:
data[(data.TransactionID=='11-7270-00007-00110')&(data.SKU=='A6007')][-4:]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
1158,A29,11-7270-00007-00110,26,2195.0,1.0,3/11/12 00:00,A6007
1159,A29,11-7270-00007-00110,27,-845.0,0.0,3/11/12 00:00,A6007
1160,A29,11-7270-00007-00110,28,-2195.0,-1.0,3/11/12 00:00,A6007
1161,A29,11-7270-00007-00110,29,845.0,0.0,3/11/12 00:00,A6007


<br>

In [None]:
# Drop regular returns from all returns, the left are orphan returns

In [156]:
print d2.shape
print d34.shape
print len(d34.TransactionID.unique())

(9349, 7)
(5708, 7)
4781


In [157]:
d_orphan = pd.merge(d2,d34,on=['StoreID','TransactionID','Line','Sales','Count','Date','SKU'],how='left',indicator=True)
d_orphan = d_orphan[d_orphan['_merge']=='left_only']
d_orphan.drop('_merge',1,inplace=True)

In [159]:
print d2.shape
print d34.shape
print d_orphan.shape
print len(d3.TransactionID.unique())
print len(d_orphan.TransactionID.unique())

(9349, 7)
(5708, 7)
(3641, 7)
4818
2813


In [160]:
d_orphan.head()

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
33,A29,11-7270-00012-00113,2,-17990.0,-2.0,3/11/12 00:00,A22728
35,A29,11-7270-00012-00126,1,-3500.0,-1.0,3/11/12 00:00,A24521
36,A29,11-7270-00012-00154,2,-17475.0,-5.0,3/11/12 00:00,A6386
37,A29,11-7270-00012-00167,2,-54000.0,-1.0,3/11/12 00:00,A1149
38,A29,11-7270-00012-00174,2,-11985.0,-3.0,3/11/12 00:00,A25628


In [161]:
data[(data.TransactionID=='11-7270-00012-00190')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
5050,A29,11-7270-00012-00190,1,-18995.0,-1.0,3/11/12 00:00,A10981
5051,A29,11-7270-00012-00190,2,-7995.0,-1.0,3/11/12 00:00,A21473


In [162]:
data[(data.TransactionID=='12-0033-00009-00040')&(data.SKU=='A10982')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
796499,A61,12-0033-00009-00040,7,1095.0,1.0,3/12/12 00:00,A10982
796500,A61,12-0033-00009-00040,8,1095.0,1.0,3/12/12 00:00,A10982
796501,A61,12-0033-00009-00040,10,-750.0,-1.0,3/12/12 00:00,A10982
796502,A61,12-0033-00009-00040,12,-1095.0,-1.0,3/12/12 00:00,A10982
796503,A61,12-0033-00009-00040,14,-345.0,-1.0,3/12/12 00:00,A10982


In [180]:
d_orphan.to_csv('orphan.csv', sep=',',index=False)

In [237]:
# Orphan return credit back (Sales>0 and Count=0)
q = '''
    select
        a.*
    from data a
    join (select distinct TransactionID, SKU from d_orphan) b
    on a.TransactionID=b.TransactionID and a.SKU=b.SKU
    where a.Sales>0 and a.Count=0
    '''
d_orphan_credit = pysqldf(q)

In [243]:
d_orphan_credit.head()

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
0,A12,12-7333-00008-00217,3,1836.0,0.0,3/12/12 00:00,A25555
1,A12,12-7333-00008-00217,5,1084.0,0.0,3/12/12 00:00,A17662
2,A12,12-7333-00008-00217,7,1083.0,0.0,3/12/12 00:00,A17662
3,A14,12-7016-00016-00142,4,1500.0,0.0,3/12/12 00:00,A10809
4,A14,12-7016-00016-00281,3,3750.0,0.0,3/12/12 00:00,A10809


In [239]:
d_orphan_credit.shape

(10, 7)

In [240]:
data[(data.TransactionID=='12-7333-00008-00217')&(data.SKU=='A17662')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
184611,A12,12-7333-00008-00217,4,-3581.0,-1.0,3/12/12 00:00,A17662
184612,A12,12-7333-00008-00217,5,1084.0,0.0,3/12/12 00:00,A17662
184613,A12,12-7333-00008-00217,6,-3581.0,-1.0,3/12/12 00:00,A17662
184614,A12,12-7333-00008-00217,7,1083.0,0.0,3/12/12 00:00,A17662


<br><br>

#### Create a column next to the raw dataset, and indicate whether a SKU is a regular return, orphan return, or discounted,

In [164]:
data.head(2)

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU
0,A29,11-7270-00007-00057,36,850.0,1.0,3/11/12 00:00,A21773
1,A29,11-7270-00007-00057,39,1500.0,1.0,3/11/12 00:00,A18009


In [244]:
df = pd.merge(data,d34,on=['StoreID','TransactionID','Line','Sales','Count','Date','SKU'],how='left',indicator='Type1')

In [245]:
import numpy as np
df['Type1'] = np.where(df.Type1 == 'both', 'Regular_Return','Others')

In [246]:
df = pd.merge(df,d_orphan,on=['StoreID','TransactionID','Line','Sales','Count','Date','SKU'],how='left',indicator='Type2')
df['Type2'] = np.where(df.Type2 == 'both', 'Orphan_Return','Others')

df = pd.merge(df,d34_1,on=['StoreID','TransactionID','Line','Sales','Count','Date','SKU'],how='left',indicator='Type3')
df['Type3'] = np.where(df.Type3 == 'both', 'CreditBack_RegularReturn','Others')

df = pd.merge(df,d_orphan_credit,on=['StoreID','TransactionID','Line','Sales','Count','Date','SKU'],how='left',indicator='Type4')
df['Type4'] = np.where(df.Type4 == 'both', 'CreditBack_OrphanReturn','Others')

In [251]:
def new_column(choice):
    x,y,z,w=choice
    if  x=='Regular_Return':
        return 'Regular_Return'
    elif y=='Orphan_Return':
        return 'Orphan_Return'
    elif z=='CreditBack_RegularReturn':
        return 'CreditBack_RegularReturn'
    elif w=='CreditBack_OrphanReturn':
        return 'CreditBack_OrphanReturn'
    else:
        return ''

In [252]:
df['Type'] = df[['Type1','Type2','Type3','Type4']].apply(new_column,axis=1)

In [255]:
df.drop(['Type1','Type2','Type3','Type4'],1,inplace=True)

In [258]:
df.to_csv('type.csv', sep=',',index=False)

In [259]:
df.shape

(1048575, 8)

In [256]:
df[(df.TransactionID=='12-7081-00015-00118')&(df.SKU=='A5483')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
485387,A33,12-7081-00015-00118,2,-33990.0,-2.0,3/12/12 00:00,A5483,Orphan_Return
485388,A33,12-7081-00015-00118,3,21000.0,0.0,3/12/12 00:00,A5483,CreditBack_OrphanReturn
485390,A33,12-7081-00015-00118,5,-16995.0,-1.0,3/12/12 00:00,A5483,Orphan_Return
485391,A33,12-7081-00015-00118,6,10500.0,0.0,3/12/12 00:00,A5483,CreditBack_OrphanReturn
485393,A33,12-7081-00015-00118,9,6495.0,1.0,3/12/12 00:00,A5483,
485394,A33,12-7081-00015-00118,10,-16995.0,-1.0,3/12/12 00:00,A5483,Orphan_Return


In [268]:
df[df.TransactionID=='11-7270-00007-00110'][-4:]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
1158,A29,11-7270-00007-00110,26,2195.0,1.0,3/11/12 00:00,A6007,
1159,A29,11-7270-00007-00110,27,-845.0,0.0,3/11/12 00:00,A6007,
1160,A29,11-7270-00007-00110,28,-2195.0,-1.0,3/11/12 00:00,A6007,Regular_Return
1161,A29,11-7270-00007-00110,29,845.0,0.0,3/11/12 00:00,A6007,CreditBack_RegularReturn


In [257]:
df[(df.TransactionID=='12-1803-00013-00364')&(df.SKU=='A21325')]

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
710011,A51,12-1803-00013-00364,1,1.295,70.0,3/12/12 00:00,A21325,
710012,A51,12-1803-00013-00364,2,-1.14917,0.0,3/12/12 00:00,A21325,
710013,A51,12-1803-00013-00364,4,81400.0,44.0,3/12/12 00:00,A21325,
710014,A51,12-1803-00013-00364,5,-72233.0,0.0,3/12/12 00:00,A21325,


In [261]:
df[df.TransactionID=='11-7270-00007-00066']

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
97,A29,11-7270-00007-00066,1,300.0,1.0,3/11/12 00:00,A21771,
98,A29,11-7270-00007-00066,3,2013.0,1.0,3/11/12 00:00,A2729,
99,A29,11-7270-00007-00066,4,2013.0,1.0,3/11/12 00:00,A2729,
100,A29,11-7270-00007-00066,5,-2013.0,-1.0,3/11/12 00:00,A2729,Regular_Return
101,A29,11-7270-00007-00066,6,3395.0,1.0,3/11/12 00:00,A20,
102,A29,11-7270-00007-00066,7,2195.0,1.0,3/11/12 00:00,A25185,
103,A29,11-7270-00007-00066,12,10000.0,1.0,3/11/12 00:00,A12810,
104,A29,11-7270-00007-00066,15,600.0,1.0,3/11/12 00:00,A17264,
105,A29,11-7270-00007-00066,16,2000.0,1.0,3/11/12 00:00,A13646,


In [260]:
df[df.TransactionID=='11-7270-00012-00190']

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
5050,A29,11-7270-00012-00190,1,-18995.0,-1.0,3/11/12 00:00,A10981,Orphan_Return
5051,A29,11-7270-00012-00190,2,-7995.0,-1.0,3/11/12 00:00,A21473,Orphan_Return


In [262]:
df[df.TransactionID=='11-7270-00012-00154']

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
5035,A29,11-7270-00012-00154,2,-17475.0,-5.0,3/11/12 00:00,A6386,Orphan_Return
5036,A29,11-7270-00012-00154,3,10000.0,5.0,3/11/12 00:00,A6386,


In [263]:
df[df.TransactionID=='11-5634-00024-00094']

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
48134,A78,11-5634-00024-00094,2,-6995.0,-1.0,3/11/12 00:00,A11213,Orphan_Return
48135,A78,11-5634-00024-00094,4,-2500.0,-1.0,3/11/12 00:00,A11213,Orphan_Return
48136,A78,11-5634-00024-00094,5,6995.0,1.0,3/11/12 00:00,A11213,
48137,A78,11-5634-00024-00094,6,-4495.0,0.0,3/11/12 00:00,A11213,
48138,A78,11-5634-00024-00094,7,6995.0,1.0,3/11/12 00:00,A11213,
48139,A78,11-5634-00024-00094,8,-4495.0,0.0,3/11/12 00:00,A11213,


In [264]:
df[df.TransactionID=='12-0537-00011-00108']

Unnamed: 0,StoreID,TransactionID,Line,Sales,Count,Date,SKU,Type
809584,A63,12-0537-00011-00108,1,600.0,2.0,3/12/12 00:00,A8078,
809585,A63,12-0537-00011-00108,2,-240.0,0.0,3/12/12 00:00,A8078,
809963,A63,12-0537-00011-00108,10,-600.0,-2.0,3/12/12 00:00,A8078,Orphan_Return
