# Gerekli Kütüphanelerin Eklenmesi

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse.linalg import svds

# Veri Setinin yüklenmesi

In [2]:
data = pd.read_csv('PBL 5 recommendation data.csv', encoding='ISO-8859-1',low_memory=False)

In [3]:
# Veri setinde kolonlar hakkında çok fazla hata aldığım için kolonları yazdırdım
print("Columns in dataset:", data.columns.tolist())

Columns in dataset: ['Customers.id', 'Customers.fname', 'Customers.lname', 'Customers.company', 'Customers.create_date', 'Customers.status', 'Customers.mailing', 'Customers.reminders', 'Customers.tax_exempt', 'Customers.account_id', 'Customers.sales_rep', 'Customers.rewards', 'Customers.profile_id', 'Customers.last_modified', 'Customers.customer_type', 'Orders.id', 'Orders.customer_id', 'Orders.fname', 'Orders.lname', 'Orders.company', 'Orders.order_number', 'Orders.reorder_id', 'Orders.external_source', 'Orders.external_id', 'Orders.currency', 'Orders.sales_rep', 'Orders.subtotal', 'Orders.tax', 'Orders.shipping', 'Orders.coupon_id', 'Orders.coupon_amount', 'Orders.gift_id', 'Orders.gift_amount', 'Orders.fee_name', 'Orders.fee_amount', 'Orders.discount_name', 'Orders.discount_amount', 'Orders.total', 'Orders.balance_due', 'Orders.shipping_carrier', 'Orders.shipping_method', 'Orders.shipping_trans', 'Orders.shipping_flags', 'Orders.weight', 'Orders.tracking', 'Orders.payment_status', '

In [4]:
data.fillna(data.mean(numeric_only=True), inplace=True)  # sayısal kolonlar için
data.select_dtypes(include=['object']).apply(lambda x: x.fillna(x.mode()[0], inplace=True))

Customers.fname       None
Customers.lname       None
Customers.company     None
Orders.fname          None
Orders.lname          None
                      ... 
Products.size         None
Products.material     None
Products.arm_style    None
Products.seat_size    None
Products.family_id    None
Length: 73, dtype: object

# En popüler ürünleri satış hacmine ve dolar tutarına göre hesaplama

In [5]:
data['total_dollar_amount'] = data['Order_Items.qty'] * data['Order_Items.price']
most_popular_by_volume = data.groupby('Order_Items.product_id')['Order_Items.qty'].sum().idxmax()
most_popular_by_dollar = data.groupby('Order_Items.product_id')['total_dollar_amount'].sum().idxmax()

# Maksimum satın alma işlemi gerçekleştiren şirketi belirleme

In [6]:
company_max_purchase = data.groupby('Customers.company')['total_dollar_amount'].sum().idxmax()

In [7]:
print("Most popular product by volume:", most_popular_by_volume)

Most popular product by volume: 4451.426162370513


In [8]:
print("Most popular product by dollar amount:", most_popular_by_dollar)

Most popular product by dollar amount: 1846.0


In [9]:
print("Company with maximum purchase:", company_max_purchase)

Company with maximum purchase: Company59


# Popülerlik Önerisi

In [10]:
popularity_based_data = data.groupby('Order_Items.product_id')['Order_Items.qty'].sum().sort_values(ascending=False).head(10)
print("Top 10 popular products by volume:", popularity_based_data)

Top 10 popular products by volume: Order_Items.product_id
4451.426162    607
1846.000000    396
2107.000000    228
1672.000000    220
1648.000000    133
1842.000000    112
493.000000     106
911.000000      68
910.000000      68
1469.000000     68
Name: Order_Items.qty, dtype: int64


# TF-IDF ve Kosinüs Benzerliğini kullanarak içerik tabanlı bir Öneri Aracı oluşturma

In [11]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['Products.long_description'].dropna())
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [12]:
def product_recommendations(product_index, cosine_sim=cosine_sim, products=data):
    sim_scores = list(enumerate(cosine_sim[product_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Gets the top 10 similar items
    product_indices = [i[0] for i in sim_scores]
    return products.iloc[product_indices]['Products.name']

# Örnek kullanım

In [13]:

recommendations = product_recommendations(0)
print("Recommended products based on content similarity:", recommendations)

Recommended products based on content similarity: 770                                 Portable Shower Bench
798                       CURAD A and D Ointment,0.017 OZ
1037                                Comfort Foam Cushions
1360    Remedy Phytoplex Hydrating Cleansing Gel,16.00 OZ
1452    Four Wheel Walker Rollator with Fold Up Remova...
1476                    Bariatric Rollator with 8" Wheels
1524                            Guardian Forearm Crutches
1782         Triangular Intake/Output Containers,960.0 ML
1807               Lightweight Steel Transport Wheelchair
1884                                 Standard Trapeze Bar
Name: Products.name, dtype: object
