# İş Problemi
- Türkiye’nin en büyük online hizmet platformu olan Armut, hizmet verenler ile hizmet almak isteyenleri buluşturmaktadır.
Bilgisayarın veya akıllı telefonunun üzerinden birkaç dokunuşla temizlik, tadilat, nakliyat gibi hizmetlere kolayca
ulaşılmasını sağlamaktadır.
- Hizmet alan kullanıcıları ve bu kullanıcıların almış oldukları servis ve kategorileri içeren veri setini kullanarak **Association
Rule Learning** ile ürün tavsiye sistemi oluşturulmak istenmektedir.

In [355]:
import pandas as pd
pd.set_option('display.max_columns', None)
from mlxtend.frequent_patterns import apriori, association_rules

In [356]:
df = pd.read_csv('armut_data.csv')

**Veri seti müşterilerin aldıkları servislerden ve bu servislerin kategorilerinden oluşmaktadır. Alınan her hizmetin tarih ve saat
bilgisini içermektedir.**

In [357]:
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate
0,25446,4,5,2017-08-06 16:11:00
1,22948,48,5,2017-08-06 16:12:00
2,10618,0,8,2017-08-06 16:13:00
3,7256,9,4,2017-08-06 16:14:00
4,25446,48,5,2017-08-06 16:16:00


In [358]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162523 entries, 0 to 162522
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   UserId      162523 non-null  int64 
 1   ServiceId   162523 non-null  int64 
 2   CategoryId  162523 non-null  int64 
 3   CreateDate  162523 non-null  object
dtypes: int64(3), object(1)
memory usage: 5.0+ MB


In [359]:
# Object Formatında olan tarih verimizi datetime formatına çeviriyoruz.
df["CreateDate"] = pd.to_datetime(df["CreateDate"])

In [360]:
# ServiceId ve CategoryId değişkenlerini birlikte temsil edecek değişkenimizi oluşturuyoruz.
df["Service"] = df["ServiceId"].astype(str) + "_" + df["CategoryId"].astype(str)

In [361]:
df.head(2)

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service
0,25446,4,5,2017-08-06 16:11:00,4_5
1,22948,48,5,2017-08-06 16:12:00,48_5


In [362]:
# İlgili tarihin sadece ay ve yıl bilgilerini alıp yeni bir değişkene atıyoruz.
df["New_Date"] = df["CreateDate"].dt.to_period("M")

In [363]:
df.head(2)

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service,New_Date
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08


In [364]:
# UserId ve New_Date değişkenlerini birleştirip BasketID oluşturuyoruz. 
df["BasketID"] = df["UserId"].astype(str) + "_" + df["New_Date"].astype(str)

In [365]:
df.head(2)

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service,New_Date,BasketID
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08,25446_2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08,22948_2017-08


# Birliktelik Kurallarının Üretilmesi

In [366]:
# Verimizi, pivot_table özelliği ile apriori algoritması için hazırlıyoruz.
basket_service_df = df.pivot_table(index = "BasketID", 
                                   columns="Service", 
                                   values="CategoryId", 
                                   aggfunc="count", 
                                   fill_value=False)

In [367]:
basket_service_df.head(2)

Service,0_8,10_9,11_11,12_7,13_11,14_7,15_1,16_8,17_5,18_4,19_6,1_4,20_5,21_5,22_0,23_10,24_10,25_0,26_7,27_7,28_4,29_0,2_0,30_2,31_6,32_4,33_4,34_6,35_11,36_1,37_0,38_4,39_10,3_5,40_8,41_3,42_1,43_2,44_0,45_6,46_4,47_7,48_5,49_1,4_5,5_11,6_7,7_3,8_5,9_4
BasketID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_2017-08,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1.0,False,1.0,False,False,False,False,False,False,False
0_2017-09,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1.0,False,1.0,False,False,False,False,False


In [368]:
# Veri setinde False olmayan tüm değişkenlerin değerini True yapıyoruz.
basket_service_df = basket_service_df.applymap(lambda x: True if x > 0 else False)

In [369]:
basket_service_df.head(2)

Service,0_8,10_9,11_11,12_7,13_11,14_7,15_1,16_8,17_5,18_4,19_6,1_4,20_5,21_5,22_0,23_10,24_10,25_0,26_7,27_7,28_4,29_0,2_0,30_2,31_6,32_4,33_4,34_6,35_11,36_1,37_0,38_4,39_10,3_5,40_8,41_3,42_1,43_2,44_0,45_6,46_4,47_7,48_5,49_1,4_5,5_11,6_7,7_3,8_5,9_4
BasketID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_2017-08,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False
0_2017-09,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False


In [370]:
frequent_itemsets = apriori(basket_service_df, min_support=0.01, use_colnames=True)

In [371]:
frequent_itemsets.sort_values(by = "support",ascending = False).head(10)

Unnamed: 0,support,itemsets
8,0.238121,(18_4)
19,0.130286,(2_0)
5,0.120963,(15_1)
39,0.067762,(49_1)
28,0.066568,(38_4)
3,0.056627,(13_11)
12,0.047515,(22_0)
9,0.045563,(19_6)
15,0.042895,(25_0)
7,0.041533,(17_5)


In [372]:
rules = association_rules(frequent_itemsets,metric="support", min_threshold=0.01)

In [373]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2_0),(13_11),0.130286,0.056627,0.012819,0.098394,1.737574,0.005442,1.046325
1,(13_11),(2_0),0.056627,0.130286,0.012819,0.226382,1.737574,0.005442,1.124216
2,(2_0),(15_1),0.130286,0.120963,0.033951,0.260588,2.154278,0.018191,1.188833
3,(15_1),(2_0),0.120963,0.130286,0.033951,0.280673,2.154278,0.018191,1.209066
4,(33_4),(15_1),0.02731,0.120963,0.011233,0.411311,3.400299,0.007929,1.493211


arl_recommender fonksiyonu ile hizmet önerisinde bulunma

In [374]:
def arl_recommender(rules_df, service, rec_count = 1):
    # Kurallar, lift değerine göre büyüktün küçüğe doğru sıralanır.
    sorted_rules = rules_df.sort_values("lift",ascending = False)
    recommendation_list = []
    # Antecedents değerler üzerinde teker teker gezilir.
    for i, product in enumerate(sorted_rules["antecedents"]):
        for j in list(product):
            if j == service:
                # service değişkenimiz Antecedents ise onun karşılşığı olan Consequents değeri recommendation_list'e eklenir.
                recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])
    
    return recommendation_list[0:rec_count]

In [375]:
arl_recommender(rules, "2_0", rec_count=2)

['22_0', '25_0']

# Çalışma Scripti Hazırlanması

In [376]:
def df_prep(df):
    df["CreateDate"] = pd.to_datetime(df["CreateDate"])
    df["Service"] = df["ServiceId"].astype(str) + "_" + df["CategoryId"].astype(str)
    df["New_Date"] = df["CreateDate"].dt.to_period("M")
    df["BasketID"] = df["UserId"].astype(str) + "_" + df["New_Date"].astype(str)
    return df

In [377]:
def create_basket_service_df(df):
    basket_service_df = df.pivot_table(index = "BasketID", 
                                   columns="Service", 
                                   values="CategoryId", 
                                   aggfunc="count", 
                                   fill_value=False)
    basket_service_df = basket_service_df.applymap(lambda x: True if x > 0 else False)
    return basket_service_df

In [378]:
def create_rules(df, min_support = 0.01):
    basket_service_df = create_basket_service_df(df)
    frequent_itemsets = apriori(basket_service_df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets,metric="support", min_threshold=min_support)
    return rules

In [379]:
def arl_recommender(rules_df, service, rec_count = 1):
    # Kurallar, lift değerine göre büyüktün küçüğe doğru sıralanır.
    sorted_rules = rules_df.sort_values("lift",ascending = False)
    recommendation_list = []
    # Antecedents değerler üzerinde teker teker gezilir.
    for i, product in enumerate(sorted_rules["antecedents"]):
        for j in list(product):
            if j == service:
                # service değişkenimiz Antecedents ise onun karşılşığı olan Consequents değeri recommendation_list'e eklenir.
                recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])
    
    return recommendation_list[0:rec_count]

In [380]:
df = pd.read_csv('armut_data.csv')

In [381]:
df = df_prep(df)

In [382]:
rules = create_rules(df)

In [383]:
arl_recommender(rules,"2_0",10)

['22_0', '25_0', '15_1', '13_11', '38_4']