In [None]:
import pandas as pd

file_path = '/content/DM_5000_Descript.xlsx'

df = pd.read_excel(file_path)

unique_invoice_count = df['InvoiceNo'].nunique()

print(f"Toplam benzersiz fatura sayısı: {unique_invoice_count}")


Toplam benzersiz fatura sayısı: 320


In [None]:
import pandas as pd
from itertools import combinations
from collections import defaultdict

file_path = '/content/DM_5000_Descript.xlsx'
df = pd.read_excel(file_path)

df = df[['InvoiceNo', 'Description', 'Country']].dropna()

countries = df['Country'].unique()
country_averages = []

for country in countries:
    print(f"Analiz yapılıyor: {country}")
    country_data = df[df['Country'] == country]

    invoice_product_map = country_data.groupby('InvoiceNo')['Description'].apply(list)

    product_count = defaultdict(int)
    pair_count = defaultdict(int)
    total_transactions = len(invoice_product_map)

    for products in invoice_product_map:
        unique_products = set(products)
        for product in unique_products:
            product_count[product] += 1
        for pair in combinations(sorted(unique_products), 2):
            pair_count[pair] += 1

    support_values = []
    confidence_values = []
    lift_values = []
    max_lift = 0
    max_lift_pair = None

    for (prod1, prod2), pair_cnt in pair_count.items():
        support = pair_cnt / total_transactions
        confidence = pair_cnt / product_count[prod1]
        lift = confidence / (product_count[prod2] / total_transactions)

        support_values.append(support)
        confidence_values.append(confidence)
        lift_values.append(lift)

        if lift > max_lift:
            max_lift = lift
            max_lift_pair = (prod1, prod2)

    avg_support = sum(support_values) / len(support_values) if support_values else 0
    avg_confidence = sum(confidence_values) / len(confidence_values) if confidence_values else 0
    avg_lift = sum(lift_values) / len(lift_values) if lift_values else 0

    country_averages.append({
        'Country': country,
        'Average Support': avg_support,
        'Average Confidence': avg_confidence,
        'Average Lift': avg_lift,
        'Most Related Products': max_lift_pair,
        'Max Lift': max_lift
    })

country_avg_df = pd.DataFrame(country_averages)

country_avg_df = country_avg_df.sort_values(by='Average Lift', ascending=False)

country_avg_df.to_csv('country_comparison_with_top_pairs.csv', index=False)

print("Ülkelere göre ortalama metrikler ve en güçlü ilişkili ürünler:")
print(country_avg_df)

from google.colab import files
files.download('country_comparison_with_top_pairs.csv')


Analiz yapılıyor: 1
Analiz yapılıyor: 2
Analiz yapılıyor: 3
Analiz yapılıyor: 4
Analiz yapılıyor: 5
Analiz yapılıyor: 6
Analiz yapılıyor: 7
Analiz yapılıyor: 8
Analiz yapılıyor: 9
Analiz yapılıyor: 10
Analiz yapılıyor: 11
Analiz yapılıyor: 12
Analiz yapılıyor: 13
Ülkelere göre ortalama metrikler ve en güçlü ilişkili ürünler:
    Country  Average Support  Average Confidence  Average Lift  \
0         1         0.003787            0.395813     44.637668   
6         7         0.200000            0.992905      4.934653   
4         5         0.200830            0.948133      4.143154   
1         2         0.340535            0.908436      2.358796   
2         3         1.000000            1.000000      1.000000   
3         4         1.000000            1.000000      1.000000   
5         6         1.000000            1.000000      1.000000   
7         8         1.000000            1.000000      1.000000   
8         9         1.000000            1.000000      1.000000   
9        10  

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
from itertools import combinations
from collections import defaultdict

file_path = '/content/DM_5000_Descript.xlsx'

df = pd.read_excel(file_path)

df = df[['InvoiceNo', 'Description']].dropna()

# Her fatura numarası için ürün listesi oluşturma
invoice_product_map = df.groupby('InvoiceNo')['Description'].apply(list)

product_count = defaultdict(int)
pair_count = defaultdict(int)
total_transactions = len(invoice_product_map)

# Ürün sıklıkları ve çift ilişkileri
for products in invoice_product_map:
    unique_products = set(products)
    for product in unique_products:
        product_count[product] += 1
    for pair in combinations(sorted(unique_products), 2):
        pair_count[pair] += 1

# Support, Confidence, Lift hesaplamaları ve yeni sütun ekleme
results = []
max_cooccurrence = 0
max_cooccurrence_pair = None

for (prod1, prod2), pair_cnt in pair_count.items():
    support = pair_cnt / total_transactions
    confidence = pair_cnt / product_count[prod1]
    lift = confidence / (product_count[prod2] / total_transactions)

    # Maksimum co-occurrence çiftini bulma
    if pair_cnt > max_cooccurrence:
        max_cooccurrence = pair_cnt
        max_cooccurrence_pair = (prod1, prod2)

    results.append({
        'Product 1': prod1,
        'Product 2': prod2,
        'Co-occurrence': pair_cnt,
        'Support': support,
        'Confidence': confidence,
        'Lift': lift
    })


results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='Lift', ascending=False)

results_df.to_csv('invoice_product_analysis_with_cooccurrence.csv', index=False)

print("\nMaksimum co-occurrence bilgisi:")
print(f"Ürün Çifti: {max_cooccurrence_pair}")
print(f"Fatura Sayısı: {max_cooccurrence}")

print("\nEn güçlü ilişkiler:")
print(results_df.head(10))

print("\nEn zayıf ilişkiler:")
print(results_df.tail(10))

from google.colab import files
files.download('invoice_product_analysis_with_cooccurrence.csv')



Maksimum co-occurrence bilgisi:
Ürün Çifti: ('KNITTED UNION FLAG HOT WATER BOTTLE', 'RED WOOLLY HOTTIE WHITE HEART.')
Fatura Sayısı: 22

En güçlü ilişkiler:
                                Product 1                           Product 2  \
14113               BLACK ORANGE SQUEEZER        CRAZY DAISY HEART DECORATION   
34173  DO NOT TOUCH MY STUFF DOOR HANGER           WOOD STAMP SET BEST WISHES   
14623        CRAZY DAISY HEART DECORATION     PINK PAISLEY SQUARE TISSUE BOX    
34149  DO NOT TOUCH MY STUFF DOOR HANGER   MINI WOODEN HAPPY BIRTHDAY GARLAND   
34150  DO NOT TOUCH MY STUFF DOOR HANGER              MOODY BOY  DOOR HANGER    
34151  DO NOT TOUCH MY STUFF DOOR HANGER              MOODY GIRL DOOR HANGER    
34152  DO NOT TOUCH MY STUFF DOOR HANGER            PACK OF 12 STICKY BUNNIES   
34160  DO NOT TOUCH MY STUFF DOOR HANGER           RED RETROSPOT PUDDING BOWL   
34171  DO NOT TOUCH MY STUFF DOOR HANGER    SMALL WHITE RETROSPOT MUG IN BOX    
34172  DO NOT TOUCH MY STUFF DOO

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import plotly.graph_objects as go

x_labels = ['Low', 'Medium', 'High']
y_values = [100, 66, 27]

accuracy_score = 0.9481865284974094
classification_report = {
    'High': {'precision': 0.96, 'recall': 0.96, 'f1-score': 0.96},
    'Low': {'precision': 0.96, 'recall': 0.96, 'f1-score': 0.96},
    'Medium': {'precision': 0.92, 'recall': 0.92, 'f1-score': 0.92},
}

def create_hover_text(label):
    classification = classification_report[label]
    return (f'Accuracy_score: {accuracy_score:.4f}<br>'
            f'precision: {classification["precision"]:.2f}<br>'
            f'recall: {classification["recall"]:.2f}<br>'
            f'f1-score: {classification["f1-score"]:.2f}')

hover_texts = [create_hover_text(label) for label in x_labels]

fig = go.Figure(go.Bar(
    x=x_labels,
    y=y_values,
    marker=dict(color=['green', 'orange', 'red']),
    hovertext=hover_texts,
    hoverinfo='text'
))

fig.update_layout(
    title="Customer Analysis",
    xaxis_title="Classification Categories",
    yaxis_title="Support Values",
    xaxis=dict(tickmode='array', tickvals=x_labels, ticktext=x_labels),
    bargap=0.4,
    bargroupgap=0.2,
)

fig.show()


En güçlü ilişkiler:
                                Product 1                           Product 2  \
14113               BLACK ORANGE SQUEEZER        CRAZY DAISY HEART DECORATION   
34173  DO NOT TOUCH MY STUFF DOOR HANGER           WOOD STAMP SET BEST WISHES   
14623        CRAZY DAISY HEART DECORATION     PINK PAISLEY SQUARE TISSUE BOX    
34149  DO NOT TOUCH MY STUFF DOOR HANGER   MINI WOODEN HAPPY BIRTHDAY GARLAND   
34150  DO NOT TOUCH MY STUFF DOOR HANGER              MOODY BOY  DOOR HANGER    
34151  DO NOT TOUCH MY STUFF DOOR HANGER              MOODY GIRL DOOR HANGER    
34152  DO NOT TOUCH MY STUFF DOOR HANGER            PACK OF 12 STICKY BUNNIES   
34160  DO NOT TOUCH MY STUFF DOOR HANGER           RED RETROSPOT PUDDING BOWL   
34171  DO NOT TOUCH MY STUFF DOOR HANGER    SMALL WHITE RETROSPOT MUG IN BOX    
34172  DO NOT TOUCH MY STUFF DOOR HANGER               TEA TIME PARTY BUNTING   

       Co-occurrence   Support  Confidence   Lift  Invoices  
14113              1  0.00

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>