In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:

# Загружаем датасет
df = pd.read_csv("Social_Network_Ads.csv")
df.head()


In [None]:

# Общая информация и описание
print(df.info())
print(df.describe())
print(df['Gender'].value_counts())
print(df['Purchased'].value_counts())


In [None]:

# Распределение возраста и зарплаты
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
sns.histplot(df['Age'], bins=20, kde=True)
plt.title("Age Distribution")

plt.subplot(1,2,2)
sns.histplot(df['EstimatedSalary'], bins=20, kde=True)
plt.title("Salary Distribution")
plt.show()

# Покупки по полу
sns.countplot(x="Gender", hue="Purchased", data=df)
plt.title("Gender vs Purchased")
plt.show()


In [None]:

# Удаляем ненужный столбец User ID
df = df.drop("User ID", axis=1)

# Кодируем пол
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

# Разделяем на признаки и цель
X = df.drop("Purchased", axis=1)
y = df["Purchased"]

# Масштабирование
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:

# Визуализация матрицы ошибок
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[0,1], yticklabels=[0,1])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
