<a href="https://colab.research.google.com/github/fatemekhanipour11/DataBase/blob/main/Python_MySQL_Data_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Preparing the environment to work with MySQL in Google Colab**

In [1]:
# This line installs the mysql-connector-python package, which allows Python to connect and interact with MySQL databases.
!pip install mysql-connector-python

Collecting mysql-connector-python
  Downloading mysql_connector_python-9.0.0-cp310-cp310-manylinux_2_17_x86_64.whl.metadata (2.0 kB)
Downloading mysql_connector_python-9.0.0-cp310-cp310-manylinux_2_17_x86_64.whl (19.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.3/19.3 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mysql-connector-python
Successfully installed mysql-connector-python-9.0.0


In [2]:
# This line installs the MySQL server on the system, enabling the creation and management of MySQL databases locally.
!apt-get -y install mysql-server

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libcgi-fast-perl libcgi-pm-perl libclone-perl libencode-locale-perl libfcgi-bin libfcgi-perl
  libfcgi0ldbl libhtml-parser-perl libhtml-tagset-perl libhtml-template-perl libhttp-date-perl
  libhttp-message-perl libio-html-perl liblwp-mediatypes-perl libmecab2 libprotobuf-lite23
  liburi-perl mecab-ipadic mecab-ipadic-utf8 mecab-utils mysql-client-8.0 mysql-client-core-8.0
  mysql-server-8.0 mysql-server-core-8.0
Suggested packages:
  libdata-dump-perl libipc-sharedcache-perl libbusiness-isbn-perl libwww-perl mailx tinyca
The following NEW packages will be installed:
  libcgi-fast-perl libcgi-pm-perl libclone-perl libencode-locale-perl libfcgi-bin libfcgi-perl
  libfcgi0ldbl libhtml-parser-perl libhtml-tagset-perl libhtml-template-perl libhttp-date-perl
  libhttp-message-perl libio-html-perl liblwp-mediatypes-perl libmecab2 libprotobuf-l

In [3]:
# This line starts the MySQL server service, allowing it to accept connections and perform database operations.
!service mysql start

 * Starting MySQL database server mysqld
   ...done.


In [4]:
# This line modifies the authentication method and password for the MySQL root user and refreshes the privileges, enabling root access with the specified password.
!mysql -e "ALTER USER 'root'@'localhost' IDENTIFIED WITH 'mysql_native_password' BY 'root';FLUSH PRIVILEGES;"

# **2. Connect to Server and Create Database**



In [5]:
import mysql.connector
from mysql.connector import Error
import pandas as pd
from sqlalchemy import create_engine

This function, create_server_connection, establishes a connection to a MySQL database server. It accepts four parameters: host_name (the server's hostname or IP address), user_name (the MySQL username), user_password (the MySQL password), and an optional db_name (the name of the database to connect to). If a database name is provided, the function connects directly to that database. If not, it connects to the server without selecting a specific database. It returns a MySQL connection object if the connection is successful, and prints an error message if it fails.

In [7]:
def create_server_connection(host_name, user_name, user_password, db_name=None):
    connection = None
    try:
        if db_name:
            connection = mysql.connector.connect(
                host=host_name,
                user=user_name,
                passwd=user_password,
                database=db_name
            )
        else:
            connection = mysql.connector.connect(
                host=host_name,
                user=user_name,
                passwd=user_password
            )
        print("MySQL Database connection successful")
    except Error as err:
        print(f"Error: '{err}'")
    return connection

In [8]:
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Query successful")
    except Error as err:
        print(f"Error: '{err}'")

def fetch_query_results(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("No results to display.")
        print("\n")
    except Error as err:
        print(f"Error: '{err}'")



In [9]:
# Connection information with the server
host_name = "localhost"
user_name = "root"
user_password = "root"
db_name = "customers"

# Connect to the server
connection = create_server_connection(host_name, user_name, user_password)


# Create database
create_database_query = f"CREATE DATABASE IF NOT EXISTS {db_name}"
execute_query(connection, create_database_query)

MySQL Database connection successful
Query successful


In [10]:
# Display the database
databases = fetch_query_results(connection, "SHOW DATABASES")
if databases:
    print("Databases:")
    for db in databases:
        print(db[0])

('customers',)
('information_schema',)
('mysql',)
('performance_schema',)
('sys',)




In [11]:
# Connect to the new database
connection = create_server_connection(host_name, user_name, user_password, db_name)

MySQL Database connection successful



# **3. Creating Tables**

In [12]:
# Create customers table
create_customers_table_query = """
CREATE TABLE IF NOT EXISTS customers (
    customer_id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    city VARCHAR(255) NOT NULL,
    phone VARCHAR(20) ,
    address VARCHAR(255),
    created_at DATE
)
"""

execute_query(connection, create_customers_table_query)


# Create order table
create_orders_table_query = """
CREATE TABLE IF NOT EXISTS orders (
    order_id INT AUTO_INCREMENT PRIMARY KEY,
    customer_id INT,
    order_date DATE,
    status VARCHAR(50),
    total_amount DECIMAL(10, 2),
    FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
)
"""
execute_query(connection, create_orders_table_query)


Query successful
Query successful


# **4. Populate Tables**

In [13]:
!pip install faker


Collecting faker
  Downloading Faker-26.1.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-26.1.0-py3-none-any.whl (1.8 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m1.6/1.8 MB[0m [31m48.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-26.1.0


In [14]:
from faker import Faker
from datetime import datetime, timedelta

This code automates the generation and insertion of sample customer and order data into a database. It leverages the Faker library to create realistic-looking data, making it a valuable tool for testing and populating databases.

In [15]:
fake = Faker('en_US')
# Define the query for inserting customer data
customers_insert_query = "INSERT INTO customers (name, city, phone, address, created_at) VALUES (%s, %s, %s, %s, %s)"
cursor = connection.cursor()
# Generate customer data
customers_data = [
    (
        fake.name(),
        fake.city(),
        fake.phone_number()[:7],
        fake.address(),
        (datetime.now() - timedelta(days=fake.random_int(min=0, max=365))).strftime('%Y-%m-%d %H:%M:%S')  # Generate a date within the past year
    )
    for _ in range(70)
]

# Insert customer data into the database
cursor.executemany(customers_insert_query, customers_data)
connection.commit()

# Fetch customer IDs to use for orders
cursor.execute("SELECT customer_id FROM customers")
customer_ids = [row[0] for row in cursor.fetchall()]

# Define the query for inserting order data
orders_insert_query = "INSERT INTO orders (customer_id, order_date, status, total_amount) VALUES (%s, %s, %s, %s)"

# Generate order data
orders_data = [
    (
        fake.random_element(customer_ids),
        fake.date_between(start_date='-1y', end_date='today'),
        fake.random_element(['Pending', 'Shipped', 'Delivered', 'Cancelled']),
        round(fake.random_number(digits=2), 2)
    )
    for _ in range(70)
]

# Insert order data into the database
cursor.executemany(orders_insert_query, orders_data)
connection.commit()

# **5. Display in dataframe**




In [6]:
!pip install PyMySQL

Collecting PyMySQL
  Downloading PyMySQL-1.1.1-py3-none-any.whl.metadata (4.4 kB)
Downloading PyMySQL-1.1.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMySQL
Successfully installed PyMySQL-1.1.1


In [None]:
engine = create_engine('mysql+pymysql://root:root@localhost/customers')

In [27]:
def DQL_query(engine, query):
  df = pd.read_sql_query(query, engine)
  return df

In [28]:
# Reading customer data
customers_table= "SELECT * FROM customers"
DQL_query(engine, customers_table)

Unnamed: 0,customer_id,name,city,phone,address,created_at
0,1,Ellen Mercado,Booneville,(698)98,"8383 Erin Ports\nPort Sherriborough, RI 26365",2023-10-16
1,2,Jessica Davis,Brittanyton,(940)38,"974 Stephanie Groves\nNew Gregoryborough, KS 5...",2023-09-19
2,3,Tyler Perez,Robertmouth,417.660,"2227 Everett Hills Suite 196\nHeatherview, MA ...",2024-04-11
3,4,Misty Humphrey,North Barry,+1-593-,"834 Jessica Green Suite 832\nPort Davidland, V...",2023-09-20
4,5,Jonathan Cohen,Lake Justinmouth,001-567,"4887 Johnson Circles\nYangside, ND 43121",2023-08-09
...,...,...,...,...,...,...
65,66,Denise Rivera,West Angela,+1-892-,"69164 John Islands Apt. 750\nJennifershire, FM...",2024-02-04
66,67,Jill Small,Tylertown,862-291,"085 Anderson Glen\nJohnsonview, ND 82674",2024-05-03
67,68,Thomas Shelton,Smithborough,851-598,"0674 Reeves View\nThomasburgh, AS 34168",2024-02-18
68,69,James Cox,South Anthonychester,(334)98,"25097 Mack Cape Apt. 713\nSouth Cynthiabury, W...",2023-12-02


In [29]:
# Read orders data
orders_table= "SELECT * FROM orders"
DQL_query(engine, orders_table)

Unnamed: 0,order_id,customer_id,order_date,status,total_amount
0,1,43,2024-03-26,Shipped,12.0
1,2,46,2023-10-23,Shipped,26.0
2,3,18,2023-09-30,Cancelled,96.0
3,4,38,2024-05-16,Shipped,19.0
4,5,51,2023-09-25,Cancelled,36.0
...,...,...,...,...,...
65,66,51,2023-08-17,Pending,95.0
66,67,24,2024-02-27,Shipped,29.0
67,68,61,2023-10-07,Pending,49.0
68,69,28,2024-01-11,Cancelled,90.0


# **6. Data analysis**

**Connection Status Check**

In [30]:
if connection.is_connected():
  print("Connection successful")
else:
  print("Connection failed")

Connection successful


**Display the tables in the database**

In [31]:
# cursor = connection.cursor()
# # Execute the SHOW TABLES query
# cursor.execute("SHOW TABLES")
# tables = cursor.fetchall()
# for table in tables:
#         table_name = table[0]
#         print(table_name)
show_tabels= "SHOW TABLES"
fetch_query_results(connection, show_tabels)

('customers',)
('orders',)




**Checking for nulls in columns**

In [32]:
is_null_query = "SELECT * FROM customers WHERE name IS NULL;"
fetch_query_results(connection, is_null_query)

No results to display.




This query searches the information_schema.columns system view to find all columns within the customers table that are defined as nullable. It then returns a list of these column names.

In [21]:
null_columns_query = "SELECT column_name FROM information_schema.columns WHERE table_name = 'customers' AND is_nullable = 'YES';"
fetch_query_results(connection, null_columns_query)

('address',)
('created_at',)
('phone',)




This SQL query selects all rows (indicated by SELECT *) from the 'customers' table where at least one of the following columns contains a NULL value: 'name', 'city', 'phone', or 'address'.

In [None]:
all_null = """
SELECT *
FROM customers
WHERE name IS NULL
   OR city IS NULL
   OR phone IS NULL
   OR created_at IS NULL
   OR address IS NULL;
   """
fetch_query_results(connection, all_null)

No results to display.




The SHOW FULL COLUMNS FROM orders; command provides detailed information about each column in the orders table.

In [None]:
show_null= "SHOW FULL COLUMNS FROM orders;"
fetch_query_results(connection, show_null)

('order_id', 'int', None, 'NO', 'PRI', None, 'auto_increment', 'select,insert,update,references', '')
('customer_id', 'int', None, 'YES', 'MUL', None, '', 'select,insert,update,references', '')
('order_date', 'date', None, 'YES', '', None, '', 'select,insert,update,references', '')
('status', 'varchar(50)', 'utf8mb4_0900_ai_ci', 'YES', '', None, '', 'select,insert,update,references', '')
('total_amount', 'decimal(10,2)', None, 'YES', '', None, '', 'select,insert,update,references', '')




**Display columns**

In [None]:
# table_name = 'customers'
# cursor.execute(f"SHOW COLUMNS FROM {table_name}")


# columns = cursor.fetchall()
# for column in columns:
#     print(column)

display_columns= "SHOW COLUMNS FROM customers;"
fetch_query_results(connection, display_columns)

('customer_id', 'int', 'NO', 'PRI', None, 'auto_increment')
('name', 'varchar(255)', 'NO', '', None, '')
('city', 'varchar(255)', 'NO', '', None, '')
('phone', 'varchar(20)', 'YES', '', None, '')
('address', 'varchar(255)', 'YES', '', None, '')
('created_at', 'timestamp', 'YES', '', 'CURRENT_TIMESTAMP', 'DEFAULT_GENERATED')




**Delete column**

In [None]:
drop_column_query = "ALTER TABLE customers DROP COLUMN phone;"
execute_query(connection, drop_column_query)


Query successful


**Fetching Column Information**

In [None]:
des = """
DESCRIBE customers;
"""
fetch_query_results(connection, des)

('customer_id', 'int', 'NO', 'PRI', None, 'auto_increment')
('name', 'varchar(255)', 'NO', '', None, '')
('city', 'varchar(255)', 'NO', '', None, '')
('phone', 'varchar(20)', 'YES', '', None, '')
('address', 'varchar(255)', 'YES', '', None, '')
('created_at', 'date', 'YES', '', None, '')




In [None]:
des = """
DESCRIBE orders;
"""
fetch_query_results(connection, des)

('order_id', 'int', 'NO', 'PRI', None, 'auto_increment')
('customer_id', 'int', 'YES', 'MUL', None, '')
('order_date', 'date', 'YES', '', None, '')
('status', 'varchar(50)', 'YES', '', None, '')
('total_amount', 'decimal(10,2)', 'YES', '', None, '')




**Calculate the number of orders for each customer**

In [33]:
q1 = """
SELECT customer_id, COUNT(*) AS total_orders
FROM orders GROUP BY customer_id
ORDER BY total_orders ASC;
"""
DQL_query(engine, q1)

Unnamed: 0,customer_id,total_orders
0,70,1
1,44,1
2,42,1
3,40,1
4,39,1
5,36,1
6,32,1
7,52,1
8,28,1
9,27,1


**Calculate the total amount of orders for each customer**

In [34]:
q2 = """
SELECT customer_id, SUM(total_amount) AS total_spent
FROM orders
GROUP BY customer_id;
"""
DQL_query(engine, q2)

Unnamed: 0,customer_id,total_spent
0,1,77.0
1,2,175.0
2,4,33.0
3,5,96.0
4,8,84.0
5,9,75.0
6,13,44.0
7,14,65.0
8,16,46.0
9,17,104.0


**List of customers who have more than 2 orders**

In [36]:
q3 = """
SELECT customer_id, COUNT(*) AS total_orders
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 2;
"""
DQL_query(engine, q3)

Unnamed: 0,customer_id,total_orders
0,18,3
1,24,3
2,26,3
3,38,4
4,43,3
5,57,3


**Total number of orders per customer:**

Objective: to identify the highest number and the lowest number of orders.

In [37]:
q4 = """
SELECT customers.name, COUNT(orders.order_id) AS total_orders
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
GROUP BY customers.customer_id
ORDER BY total_orders DESC;
"""
DQL_query(engine, q4)

Unnamed: 0,name,total_orders
0,Terrence Townsend,4
1,David Smith,3
2,Valerie Johnson,3
3,Caleb Rodriguez,3
4,Nancy Pacheco,3
5,Warren Delgado,3
6,Jessica Davis,2
7,Holly Rose,2
8,Douglas Torres,2
9,Amanda Barker,2


**Calculation of the total amount of each customer's orders:**

Objective: to identify the customers who have the highest and lowest total amount of orders.

In [38]:
q5 = """
SELECT customers.name, SUM(orders.total_amount) AS total_amount
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
GROUP BY customers.customer_id
ORDER BY total_amount DESC;
"""
DQL_query(engine, q5)

Unnamed: 0,name,total_amount
0,Terrence Townsend,211.0
1,Caleb Rodriguez,193.0
2,Jessica Davis,175.0
3,Nancy Pacheco,156.0
4,Nicole Smith,131.0
5,Douglas Torres,126.0
6,David Smith,115.0
7,Michael Ramirez,113.0
8,Valerie Johnson,111.0
9,Warren Delgado,108.0


**Find orders whose amount is greater than the average amount of all orders:**

Objective: Identify large orders that may be worth further analysis.

In [39]:
q6 = """
SELECT *
FROM orders
WHERE total_amount > (SELECT AVG(total_amount) FROM orders);
"""
DQL_query(engine, q6)

Unnamed: 0,order_id,customer_id,order_date,status,total_amount
0,3,18,2023-09-30,Cancelled,96.0
1,6,32,2024-06-07,Cancelled,64.0
2,7,18,2023-09-18,Delivered,77.0
3,10,24,2023-11-04,Cancelled,60.0
4,15,59,2023-08-24,Pending,62.0
5,17,9,2024-03-16,Shipped,49.0
6,19,64,2023-08-05,Delivered,61.0
7,21,36,2024-02-16,Cancelled,82.0
8,22,1,2023-09-08,Cancelled,60.0
9,26,43,2024-01-13,Delivered,89.0


**Order timing analysis for each customer:**

Objective: To identify time patterns in customer orders.

In [40]:
q7 = """
SELECT customers.name, orders.order_date, COUNT(orders.order_id) AS total_orders
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
GROUP BY customers.customer_id, orders.order_date
ORDER BY customers.name, orders.order_date;
"""
DQL_query(engine, q7)

Unnamed: 0,name,order_date,total_orders
0,Amanda Baker,2024-07-14,1
1,Amanda Barker,2023-09-29,1
2,Amanda Barker,2023-10-23,1
3,Angelica Moore,2024-02-16,1
4,Caleb Rodriguez,2023-09-18,1
...,...,...,...
65,Victoria Simmons,2023-11-06,1
66,Victoria Simmons,2024-04-18,1
67,Warren Delgado,2024-01-09,1
68,Warren Delgado,2024-02-01,1


**Find customers who have not placed any orders in a certain time period:**

Objective: Identify customers who may have been lost or need to be followed up.

In [41]:
q8 = """
SELECT customers.name
FROM customers
LEFT JOIN orders ON customers.customer_id = orders.customer_id
WHERE orders.order_id IS NULL;
"""
DQL_query(engine, q8)

Unnamed: 0,name
0,Tyler Perez
1,April Jackson DVM
2,Derrick Lopez
3,David Stephens
4,Paul Williamson
5,Jesus Moore DVM
6,Steven Thomas
7,Joseph Mckenzie
8,Linda Mays
9,Lisa Contreras


**Calculate the average time between orders for each customer:**

Objective: Analyzing customers' buying patterns and identifying common times between orders.

In [42]:
q9 ="""
SELECT customer_id, AVG(DATEDIFF(next_order_date, order_date)) AS avg_days_between_orders
FROM (
  SELECT customer_id, order_date,
         LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) AS next_order_date
  FROM orders
) AS subquery
WHERE next_order_date IS NOT NULL
GROUP BY customer_id;
"""
DQL_query(engine, q9)

Unnamed: 0,customer_id,avg_days_between_orders
0,1,81.0
1,2,55.0
2,4,33.0
3,5,90.0
4,8,10.0
5,9,7.0
6,17,116.0
7,18,6.0
8,24,57.5
9,26,41.5


**Number of orders based on status:**

Objective: to identify the number of orders in each situation.

In [43]:
q10 = """
SELECT status, COUNT(order_id) AS total_orders
FROM orders
GROUP BY status
ORDER BY total_orders DESC;
"""
DQL_query(engine, q10)

Unnamed: 0,status,total_orders
0,Cancelled,21
1,Delivered,19
2,Shipped,16
3,Pending,14


**Calculation of the total amount and average amount of orders for each situation:**



In [44]:
q11 = """
SELECT status, SUM(total_amount) AS total_amount, AVG(total_amount) AS average_amount, COUNT(order_id) AS total_orders
FROM orders
GROUP BY status
ORDER BY total_amount DESC;
"""
DQL_query(engine, q11)



Unnamed: 0,status,total_amount,average_amount,total_orders
0,Cancelled,1020.0,48.571429,21
1,Delivered,877.0,46.157895,19
2,Shipped,705.0,44.0625,16
3,Pending,694.0,49.571429,14


**Find customers whose orders have been canceled:**

In [45]:
q12 = """
SELECT customers.name, orders.order_id, orders.status
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
WHERE orders.status = 'Cancelled';
"""
DQL_query(engine, q12)



Unnamed: 0,name,order_id,status
0,Caleb Rodriguez,3,Cancelled
1,Nicole Smith,5,Cancelled
2,Ronald Ferguson,6,Cancelled
3,Nancy Pacheco,10,Cancelled
4,Shane Williams,13,Cancelled
5,Holly Rose,16,Cancelled
6,Angelica Moore,21,Cancelled
7,Ellen Mercado,22,Cancelled
8,Jocelyn Perez,29,Cancelled
9,Misty Humphrey,35,Cancelled


**The number of orders in each status in a certain period of time:**

In [46]:
q13 = """
SELECT status, COUNT(order_id) AS total_orders
FROM orders
WHERE order_date BETWEEN '2024-01-01' AND '2024-06-30'
GROUP BY status
ORDER BY total_orders DESC;
"""
DQL_query(engine, q13)



Unnamed: 0,status,total_orders
0,Delivered,11
1,Shipped,9
2,Cancelled,7
3,Pending,4


**Number of customers by city**

In [47]:
q14 ="""
SELECT city, COUNT(customer_id) AS total_customers
FROM customers
GROUP BY city
ORDER BY total_customers DESC;
"""
DQL_query(engine, q14)

Unnamed: 0,city,total_customers
0,Rodriguezstad,1
1,Shirleyshire,1
2,Scottville,1
3,South Lynn,1
4,Lynntown,1
...,...,...
65,New Teresa,1
66,North Oscarbury,1
67,East Mauriceburgh,1
68,Lisaborough,1


**The lowest amount of the order and the corresponding customer**

In [50]:
q15 = """
SELECT o.order_id, c.name AS customer_name, o.total_amount
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id  -- 'c' is an alias for the 'customers' table
ORDER BY o.total_amount DESC
LIMIT 3;
"""
DQL_query(engine, q15)

Unnamed: 0,order_id,customer_name,total_amount
0,39,Douglas Torres,97.0
1,3,Caleb Rodriguez,96.0
2,66,Nicole Smith,95.0


**Search based on a specific pattern**

In [62]:
q16 = """
SELECT *
FROM customers
WHERE name LIKE '%Jon%';
"""
fetch_query_results(connection, q16)

(5, 'Jonathan Cohen', 'Lake Justinmouth', '001-567', '4887 Johnson Circles\nYangside, ND 43121', datetime.date(2023, 8, 9))




**Customer conversion rate analysis**

To check the percentage of customers who have placed an order compared to total customers:

In [52]:
q17 = """
SELECT (COUNT(DISTINCT o.customer_id) / COUNT(DISTINCT c.customer_id)) * 100 AS conversion_rate
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;

"""

DQL_query(engine, q17)

Unnamed: 0,conversion_rate
0,58.5714


**Analysis of order cost changes over time**

In [53]:
q18 = """
SELECT DATE(order_date) AS order_date, SUM(total_amount) AS daily_total
FROM orders
GROUP BY DATE(order_date)
ORDER BY order_date;
"""
DQL_query(engine, q18)

Unnamed: 0,order_date,daily_total
0,2023-08-05,61.0
1,2023-08-11,3.0
2,2023-08-17,95.0
3,2023-08-24,62.0
4,2023-09-08,60.0
...,...,...
58,2024-06-25,30.0
59,2024-07-11,42.0
60,2024-07-14,65.0
61,2024-07-21,71.0


**Categorizing the status of orders using CASE**

In [54]:
q19 = """
SELECT o.status,
       COUNT(o.order_id) AS total_orders,
       CASE
           WHEN o.status = 'shipped' THEN 'Shipped Orders'
           WHEN o.status = 'pending' THEN 'Pending Orders'
           WHEN o.status = 'cancelled' THEN 'Cancelled Orders'
           WHEN o.status = 'delivered' THEN 'delivered Orders'
           ELSE 'Other Statuses'
       END AS status_category
FROM orders o
GROUP BY o.status
ORDER BY total_orders DESC;
"""
DQL_query(engine, q19)

Unnamed: 0,status,total_orders,status_category
0,Cancelled,21,Cancelled Orders
1,Delivered,19,delivered Orders
2,Shipped,16,Shipped Orders
3,Pending,14,Pending Orders


**Categorizing customers based on total costs**

In [55]:
q20 = """
SELECT c.name AS customer_name,
       SUM(o.total_amount) AS total_spent,
       CASE
           WHEN SUM(o.total_amount) > 100 THEN 'High'
           WHEN SUM(o.total_amount) BETWEEN 50 AND 100 THEN 'Medium'
           ELSE 'Low'
       END AS spending_category
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.name
ORDER BY total_spent DESC;
"""
DQL_query(engine, q20)

Unnamed: 0,customer_name,total_spent,spending_category
0,Terrence Townsend,211.0,High
1,Caleb Rodriguez,193.0,High
2,Jessica Davis,175.0,High
3,Nancy Pacheco,156.0,High
4,Nicole Smith,131.0,High
5,Douglas Torres,126.0,High
6,David Smith,115.0,High
7,Michael Ramirez,113.0,High
8,Valerie Johnson,111.0,High
9,Warren Delgado,108.0,High


**Analysis of the growth of new customers**

In [56]:
q21 = """
SELECT DATE(created_at) AS date, COUNT(*) AS new_customers
FROM customers
GROUP BY DATE(created_at)
ORDER BY DATE(created_at);
"""
DQL_query(engine, q21)

Unnamed: 0,date,new_customers
0,2023-08-06,1
1,2023-08-09,1
2,2023-08-15,1
3,2023-08-16,1
4,2023-08-19,2
...,...,...
58,2024-07-14,1
59,2024-07-18,1
60,2024-07-22,1
61,2024-07-24,1


**Combining the creation dates of customers and orders**

In [57]:
q22 = """
SELECT created_at AS date, 'Customer' AS type
FROM customers
UNION ALL
SELECT order_date AS date, 'Order' AS type
FROM orders;

"""
DQL_query(engine, q22)

Unnamed: 0,date,type
0,2023-10-16,Customer
1,2023-09-19,Customer
2,2024-04-11,Customer
3,2023-09-20,Customer
4,2023-08-09,Customer
...,...,...
135,2023-08-17,Order
136,2024-02-27,Order
137,2023-10-07,Order
138,2024-01-11,Order


**Customers with the third-highest purchase**

In [58]:
q23 = """
SELECT c.name , o.total_amount
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.total_amount = (
    SELECT DISTINCT total_amount
    FROM orders
    ORDER BY total_amount DESC
    LIMIT 1 OFFSET 2
);
"""
DQL_query(engine, q23)

Unnamed: 0,name,total_amount
0,Nicole Smith,95.0


**Number of orders registered per month**

In [60]:
q24 ="""
SELECT DATE_FORMAT(order_date, '%Y-%m') AS month, COUNT(*) AS total_orders
FROM orders
GROUP BY month
ORDER BY month;
"""
fetch_query_results(connection, q24)

('2023-08', 4)
('2023-09', 10)
('2023-10', 5)
('2023-11', 5)
('2023-12', 10)
('2024-01', 10)
('2024-02', 5)
('2024-03', 4)
('2024-04', 6)
('2024-05', 3)
('2024-06', 3)
('2024-07', 5)




**Identify Customers Without Orders**

In [61]:
left_exclusive= """
SELECT c.customer_id, c.name
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL;
"""
DQL_query(engine, left_exclusive)

Unnamed: 0,customer_id,name
0,3,Tyler Perez
1,6,April Jackson DVM
2,7,Derrick Lopez
3,10,David Stephens
4,11,Paul Williamson
5,12,Jesus Moore DVM
6,15,Steven Thomas
7,20,Joseph Mckenzie
8,21,Linda Mays
9,22,Lisa Contreras


**Add a record to the table**

In [None]:
customers_insert_query = """
INSERT INTO customers (name, city, phone, address, created_at)
VALUES ('Rayan karimi', 'New York', '123-4567', '400 Elm St', NOW());
"""
execute_query(connection, customers_insert_query)

**Update the status of specific orders**

In [None]:
Update_query = """
UPDATE orders
SET status = 'shipped'
WHERE order_id IN (SELECT order_id FROM orders WHERE status = 'completed');
"""

execute_query(connection, q14)

**Drop Tables**

This SQL script is designed to remove two related tables from a database. The DROP TABLE orders command deletes the "orders" table, which is presumably a child table referencing the "customers" table. By deleting the "orders" table first, we ensure that there are no foreign key constraints preventing the subsequent deletion of the "customers" table. The DROP TABLE customers command then removes the "customers" table, which is considered the parent table in this relationship.

In [None]:
drop_table_orders = """
DROP TABLE orders;
"""
execute_query(connection, drop_table_orders)

In [None]:
drop_table_customers = """
DROP TABLE customers;
"""
execute_query(connection, drop_table_customers)