In [None]:
import pandas as pd
import numpy as np

sales_data = pd.read_csv('satis_verisi_5000.csv')
customer_data = pd.read_csv('musteri_verisi_5000_utf8.csv')

print("Sales data: ",sales_data.isnull().sum())
print("Customer data: ",customer_data.isnull().sum())

sales_data['fiyat'].fillna(sales_data['fiyat'].mean(), inplace=True)
customer_data['harcama_miktari'].fillna(customer_data['harcama_miktari'].median(), inplace=True)

sales_data = sales_data[sales_data['fiyat'] < sales_data['fiyat'].quantile(0.95)]
customer_data = customer_data[customer_data['harcama_miktari'] < customer_data['harcama_miktari'].quantile(0.95)]

merged_data = pd.merge(sales_data, customer_data, on='musteri_id', how='inner')


In [None]:
import matplotlib.pyplot as plt

merged_data['tarih'] = pd.to_datetime(merged_data['tarih'])

merged_data['hafta'] = merged_data['tarih'].dt.isocalendar().week
merged_data['ay'] = merged_data['tarih'].dt.month

weekly_sales = merged_data.groupby('hafta')['toplam_satis'].sum()
monthly_sales = merged_data.groupby('ay')['toplam_satis'].sum()

plt.plot(monthly_sales)
plt.title("Aylık Satış Trendleri")
plt.xlabel("Ay")
plt.ylabel("Toplam Satış")
plt.show()


In [None]:
plt.plot(weekly_sales)
plt.title("Haftalık Satış Trendleri")
plt.xlabel("Hafta")
plt.ylabel("Toplam Satış")
plt.show()


In [None]:
category_sales = merged_data.groupby('kategori')['toplam_satis'].sum()
age_bins = [0, 25, 35, 50, np.inf]
age_labels = ['18-25', '26-35', '36-50', '50+']
merged_data['age_group'] = pd.cut(merged_data['yas'], bins=age_bins, labels=age_labels)

age_group_sales = merged_data.groupby('age_group')['toplam_satis'].sum()
gender_spending = merged_data.groupby('cinsiyet')['harcama_miktari'].sum()


In [None]:
city_spending = merged_data.groupby('sehir')['harcama_miktari'].sum()

merged_data['previous_month_sales'] = merged_data.groupby('ürün_kodu')['toplam_satis'].shift(1)
merged_data['sales_change_percentage'] = (merged_data['toplam_satis'] - merged_data['previous_month_sales']) / merged_data['previous_month_sales'] * 100

monthly_category_sales = merged_data.groupby(['kategori', 'ay'])['toplam_satis'].sum()
monthly_category_sales_percentage_change = monthly_category_sales.pct_change() * 100


In [None]:

pareto_data = merged_data.groupby('ürün_kodu')['toplam_satis'].sum().sort_values(ascending=False)
pareto_data_cumulative = pareto_data.cumsum() / pareto_data.sum()
pareto_products = pareto_data[pareto_data_cumulative <= 0.8]

merged_data['first_purchase_month'] = merged_data.groupby('musteri_id')['tarih'].transform('min').dt.to_period('M')
merged_data['tarih_month'] = merged_data['tarih'].dt.to_period('M')

cohort_data = merged_data.groupby(['first_purchase_month', 'tarih_month']).size().unstack(fill_value=0)


from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X = merged_data[['fiyat', 'adet']]  
y = merged_data['toplam_satis'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

score = model.score(X_test, y_test)


In [None]:
score

In [None]:
y_pred = model.predict(X_test)
plt.scatter(y_test, y_pred)
plt.title("Gerçek vs Tahmin Satışlar")
plt.xlabel("Gerçek Satışlar")
plt.ylabel("Tahmin Edilen Satışlar")
plt.show()