# Writing data to and reading data from a Database using Python

## Libraries and settings

In [1]:
# Libraries
import os
import sqlite3
import fnmatch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

## Create sqlite data base

In [2]:
# Get current working directory
print(os.getcwd())

# Create data base
conn = sqlite3.connect('supermarkets_data.db') 
cursor = conn.cursor()

# Show dbs in the directory
flist = fnmatch.filter(os.listdir('.'), '*.db')
for i in flist:
    print(i)

C:\Workspacezhaw\data_analytics\Woche 2
apartements_database.db
apartment_data.db
apartment_database.db
supermarkets_data.db
supermarkets_database.db


## Create SQL-table in the database

In [3]:
cursor.execute('''CREATE TABLE IF NOT EXISTS supermarkets_table (Id VARCHAR(50),
                                                                Brand VARCHAR(50),
                                                                Shop VARCHAR(50),
                                                                City VARCHAR(50),
                                                                Postcode DECIMAL(8,2))''')
# Confirm changes to the table
conn.commit()

## Read data from file to data frame

In [10]:
df = pd.read_csv('supermarkets_data_prepared.csv', 
                  sep=',', 
                  encoding='utf-8') [['brand', 'shop', 'addr:city', 'addr:postcode']]
print(df.shape)
df.head(5)

(3260, 4)


Unnamed: 0,brand,shop,addr:city,addr:postcode
0,Spar,supermarket,,
1,Migros,supermarket,Uznach,8730.0
2,Coop,supermarket,Uznach,8730.0
3,Coop,supermarket,Zürich,8001.0
4,Migros,supermarket,Zürich,8004.0


## Write data to the SQL-table in data base

In [5]:
df.to_sql(name = 'supermarkets_table',
          con = conn,
          index = False,
          if_exists = 'replace')

3260

## Query the SQL-table

![image.png](attachment:image.png)

In [6]:
# Query the SQL-table
cursor.execute('''SELECT * FROM supermarkets_table 
                 WHERE "addr:city" == 'Winterthur' ''')

df = pd.DataFrame(cursor.fetchall())
  
print(df)

                0            1           2       3
0          Migros  supermarket  Winterthur  8406.0
1          Migros  supermarket  Winterthur  8400.0
2            None  supermarket  Winterthur  8400.0
3          Migros  supermarket  Winterthur  8400.0
4            None  supermarket  Winterthur  8404.0
5            None  supermarket  Winterthur  8400.0
6            None  supermarket  Winterthur  8400.0
7          Migros  supermarket  Winterthur  8405.0
8          Denner  supermarket  Winterthur  8405.0
9          Migros  supermarket  Winterthur  8400.0
10         Migros  supermarket  Winterthur  8400.0
11           None  supermarket  Winterthur  8404.0
12           ALDI  supermarket  Winterthur  8400.0
13           None  supermarket  Winterthur  8400.0
14           None  supermarket  Winterthur  8400.0
15           None  supermarket  Winterthur  8406.0
16           None  supermarket  Winterthur  8406.0
17       Alnatura  supermarket  Winterthur  8400.0
18         Migros  supermarket 

### Additional SQL-queries

In [12]:
# Write an SQL-query to filter all apartments with >= 4.0 rooms and where the area
# is >= 100m2. An example SQL query can be found in the Jupyter notebook.

# Query the SQL-table
cursor.execute('''SELECT * FROM apartments_table 
                  WHERE Rooms >= 4.0 
                  AND Area > 100''')

df = pd.DataFrame(cursor.fetchall())
   
print(df)


                   0    1      2       3
0     1662023742-807  5.5  115.0  2860.0
1     1662023739-771  5.5  195.0  6900.0
2     1662023727-685  4.5  124.0  4460.0
3     1662023689-402  4.5  109.0  2380.0
4    1662023777-1079  4.5  114.0  2910.0
..               ...  ...    ...     ...
166  1662023813-1358  4.5  115.0  2580.0
167  1662023795-1213  4.5  117.0  2180.0
168  1662023786-1151  5.5  162.0  2210.0
169   1662023692-428  4.5  127.0  2820.0
170   1662023749-858  5.5  123.0  2550.0

[171 rows x 4 columns]


In [13]:
# Write an SQL-query to calculate the average price per room size
cursor.execute('''SELECT rooms, AVG(price)
FROM apartments_table
GROUP BY rooms''')

df = pd.DataFrame(cursor.fetchall())
   
print(df)


      0            1
0   NaN  1950.300000
1   1.0  1489.607843
2   1.5  1898.509804
3   2.0  1649.785714
4   2.5  2276.822485
5   3.0  1803.171053
6   3.5  2347.770833
7   4.0  2245.927273
8   4.5  2663.706468
9   5.0  3077.125000
10  5.5  2933.085714
11  6.0  4240.000000
12  6.5  4436.666667
13  7.0  4220.000000
14  9.0  4000.000000


In [14]:
# Write an SQL-query to calculate the average area per room size.
cursor.execute('''SELECT rooms, AVG(area)
FROM apartments_table
GROUP BY rooms''')

df = pd.DataFrame(cursor.fetchall())
   
print(df)

      0           1
0   NaN         NaN
1   1.0   41.936170
2   1.5   61.173913
3   2.0   56.054054
4   2.5   69.006250
5   3.0   68.597015
6   3.5   87.100358
7   4.0   86.750000
8   4.5  111.284946
9   5.0  124.769231
10  5.5  139.258065
11  6.0  153.000000
12  6.5  194.000000
13  7.0         NaN
14  9.0  300.000000


### Jupyter notebook --footer info-- (please always provide this at the end of each submitted notebook)

In [11]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
NT
Windows | 10
Datetime: 2023-01-21 17:01:16
Python Version: 3.9.7
-----------------------------------
