In [None]:
import psycopg2
import pandas as pd

# Configuration de la connexion
conn = psycopg2.connect(
    host="localhost",
    port="5433",
    user="guillaumedemerges",
    password="",
    dbname="bookscraper"
)

# Vérification de la connexion
cur = conn.cursor()
cur.execute("SELECT version();")
db_version = cur.fetchone()
print(f"Connected to database:\n{db_version[0]}")
cur.close()

In [None]:
query = "SELECT * FROM books;"
df_books = pd.read_sql(query, conn)

print(df_books.head())

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.hist(df_books['price'], bins=20, color='purple', edgecolor='black')
plt.xlabel('Price (£)')
plt.ylabel('Number of Books')
plt.title('Distribution of Book Prices')
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
df_books_per_category = df_books['category'].value_counts()
df_books_per_category.plot(kind='bar', color='lightblue')
plt.xlabel('Category')
plt.ylabel('Number of Books')
plt.title('Number of Books per Category')
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
df_avg_price_per_category = df_books.groupby('category')['price'].mean().sort_values(ascending=False)
df_avg_price_per_category.plot(kind='bar', color='pink')
plt.xlabel('Category')
plt.ylabel('Average Price (£)')
plt.title('Average Price by Category')
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
df_ratings_distribution = df_books['rating'].value_counts()
df_ratings_distribution.plot(kind='bar', color='orange')
plt.xlabel('Rating')
plt.ylabel('Number of Books')
plt.title('Distribution of Book Ratings')
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
df_total_stock_per_category = df_books.groupby('category')['stock'].sum().sort_values(ascending=False)
df_total_stock_per_category.plot(kind='bar', color='teal')
plt.xlabel('Category')
plt.ylabel('Total Stock')
plt.title('Total Stock by Category')
plt.xticks(rotation=90)
plt.show()

In [None]:
import seaborn as sns

plt.figure(figsize=(12, 8))
sns.boxplot(data=df_books, x='category', y='price')
plt.xlabel('Category')
plt.ylabel('Price (£)')
plt.title('Boxplot of Book Prices by Category')
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_books['price'], df_books['stock'], alpha=0.5, color='green')
plt.xlabel('Price (£)')
plt.ylabel('Stock')
plt.title('Price vs. Stock')
plt.show()

In [None]:
import seaborn as sns

plt.figure(figsize=(10, 8))
corr = df_books.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix')
plt.show()