In [4]:
import sqlite3
import pandas as pd

# Create an in-memory SQLite DB or use a file
conn = sqlite3.connect('superstore.db')
cursor = conn.cursor()


In [6]:
# Load the CSV
df = pd.read_csv('/content/Superstore.csv', encoding='ISO-8859-1')

# Write to SQLite
df.to_sql('orders', conn, if_exists='replace', index=False)


9994

In [7]:
query = '''
SELECT Region, COUNT(*) AS Total_Orders, AVG(Sales) AS Avg_Sales
FROM orders
WHERE Sales > 500
GROUP BY Region;
'''

result = pd.read_sql_query(query, conn)
print(result)


    Region  Total_Orders    Avg_Sales
0  Central           256  1239.449326
1     East           333  1337.518817
2    South           199  1319.817977
3     West           374  1202.266209


In [8]:
# Create a simplified table with just Customer ID, Name, and Sales
query_create = '''
CREATE TABLE IF NOT EXISTS customers_orders AS
SELECT [Customer ID] AS customer_id,
       [Customer Name] AS name,
       Sales
FROM orders;
'''
cursor.execute(query_create)
conn.commit()


In [9]:
query = '''
SELECT name
FROM customers_orders
WHERE customer_id IN (
    SELECT customer_id
    FROM customers_orders
    WHERE Sales > 1000
);
'''
result = pd.read_sql_query(query, conn)
print(result.drop_duplicates())  # Optional: remove duplicate names


                  name
0      Brosina Hoffman
7         Irene Maddox
8        Harold Pawlan
10           Pete Kriz
11     Alejandro Grove
...                ...
2799    Barry Gonzalez
3294  Victoria Pisteka
3360     Ionia McGrath
3742    Craig Molinari
4018   Lindsay Castell

[339 rows x 1 columns]


In [10]:
# Update State where it is NULL (or blank)
update_query = '''
UPDATE orders
SET State = 'Unknown'
WHERE State IS NULL OR TRIM(State) = '';
'''
cursor.execute(update_query)
conn.commit()


In [12]:
pd.read_sql_query("SELECT DISTINCT State FROM orders", conn).head()


Unnamed: 0,State
0,Kentucky
1,California
2,Florida
3,North Carolina
4,Washington


In [13]:
delete_query = '''
DELETE FROM orders
WHERE Sales < 0;
'''
cursor.execute(delete_query)
conn.commit()


In [14]:
pd.read_sql_query("SELECT * FROM orders WHERE Sales < 0", conn)


Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit


In [16]:
#Merge in Pandas

# Simulate a 'customers' table
customers_df = df[['Customer ID', 'Customer Name']].drop_duplicates()
customers_df.columns = ['customer_id', 'customer_name']

# Simulate an 'orders' table
orders_df = df[['Order ID', 'Customer ID', 'Order Date', 'Sales', 'Profit']]
orders_df.columns = ['order_id', 'customer_id', 'order_date', 'sales', 'profit']


In [17]:
merged_df = pd.merge(customers_df, orders_df, on='customer_id', how='inner')
print(merged_df.head())


  customer_id customer_name        order_id  order_date    sales    profit
0    CG-12520   Claire Gute  CA-2016-152156   11/8/2016  261.960   41.9136
1    CG-12520   Claire Gute  CA-2016-152156   11/8/2016  731.940  219.5820
2    CG-12520   Claire Gute  CA-2017-164098   1/26/2017   18.160    1.8160
3    CG-12520   Claire Gute  US-2015-123918  10/15/2015  131.376  -95.2476
4    CG-12520   Claire Gute  US-2015-123918  10/15/2015    5.344    1.8704
